📜 Add better documentation across the compiler. (#3)
These changes pay particular attention to API endpoints, to try to ensure that any rustdocs generated are detailed and sensible. A good next step, eventually, might be to include doctest examples, as well. For the moment, it's not clear that they would provide a lot of value, though. In addition, this does a couple refactors to simplify the code base in ways that make things clearer or, at least, briefer.
This commit is contained in:
@@ -19,7 +19,7 @@ cranelift-module = "0.94.0"
|
|||||||
cranelift-native = "0.94.0"
|
cranelift-native = "0.94.0"
|
||||||
cranelift-object = "0.94.0"
|
cranelift-object = "0.94.0"
|
||||||
internment = { version = "0.7.0", default-features = false, features = ["arc"] }
|
internment = { version = "0.7.0", default-features = false, features = ["arc"] }
|
||||||
lalrpop-util = "^0.19.7"
|
lalrpop-util = "^0.20.0"
|
||||||
lazy_static = "^1.4.0"
|
lazy_static = "^1.4.0"
|
||||||
logos = "^0.12.0"
|
logos = "^0.12.0"
|
||||||
pretty = { version = "^0.11.2", features = ["termcolor"] }
|
pretty = { version = "^0.11.2", features = ["termcolor"] }
|
||||||
@@ -30,4 +30,4 @@ tempfile = "^3.5.0"
|
|||||||
thiserror = "^1.0.30"
|
thiserror = "^1.0.30"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
lalrpop = "^0.19.7"
|
lalrpop = "^0.20.0"
|
||||||
|
|||||||
@@ -1,3 +1,31 @@
|
|||||||
|
//! # The compiler backend: generation of machine code, both static and JIT.
|
||||||
|
//!
|
||||||
|
//! This module is responsible for taking our intermediate representation from
|
||||||
|
//! [`crate::ir`] and turning it into Cranelift and then into object code that
|
||||||
|
//! can either be saved to disk or run in memory. Because the runtime functions
|
||||||
|
//! for NGR are very closely tied to the compiler implentation, we also include
|
||||||
|
//! information about these functions as part of the module.
|
||||||
|
//!
|
||||||
|
//! ## Using the `Backend`
|
||||||
|
//!
|
||||||
|
//! The backend of this compiler can be used in two modes: a static compilation
|
||||||
|
//! mode, where the goal is to write the compiled object to disk and then link
|
||||||
|
//! it later, and a JIT mode, where the goal is to write the compiled object to
|
||||||
|
//! memory and then run it. Both modes use the same `Backend` object, because
|
||||||
|
//! they share a lot of behaviors. However, you'll want to use different variants
|
||||||
|
//! based on your goals:
|
||||||
|
//!
|
||||||
|
//! * Use `Backend<ObjectModule>`, constructed via [`Backend::object_file`],
|
||||||
|
//! if you want to compile to an object file on disk, which you're then going
|
||||||
|
//! to link to later.
|
||||||
|
//! * Use `Backend<JITModule>`, constructed via [`Backend::jit`], if you want
|
||||||
|
//! to do just-in-time compilation and are just going to run things immediately.
|
||||||
|
//!
|
||||||
|
//! ## Working with Runtime Functions
|
||||||
|
//!
|
||||||
|
//! For now, runtime functions are pretty easy to describe, because there's
|
||||||
|
//! only one. In the future, though, the [`RuntimeFunctions`] object is there to
|
||||||
|
//! help provide a clean interface to them all.
|
||||||
mod error;
|
mod error;
|
||||||
mod eval;
|
mod eval;
|
||||||
mod into_crane;
|
mod into_crane;
|
||||||
@@ -16,6 +44,15 @@ use target_lexicon::Triple;
|
|||||||
|
|
||||||
const EMPTY_DATUM: [u8; 8] = [0; 8];
|
const EMPTY_DATUM: [u8; 8] = [0; 8];
|
||||||
|
|
||||||
|
/// An object representing an active backend.
|
||||||
|
///
|
||||||
|
/// Internally, this object holds a bunch of state useful for compiling one
|
||||||
|
/// or more functions into an object file or memory. It can be passed around,
|
||||||
|
/// but cannot currently be duplicated because some of that state is not
|
||||||
|
/// easily duplicated. You should be able to share this across threads, assuming
|
||||||
|
/// normal Rust safety, but you should be thoughtful about transferring it across
|
||||||
|
/// processes in a JIT context due to some special cases in the runtime function
|
||||||
|
/// implementations.
|
||||||
pub struct Backend<M: Module> {
|
pub struct Backend<M: Module> {
|
||||||
pub module: M,
|
pub module: M,
|
||||||
data_ctx: DataContext,
|
data_ctx: DataContext,
|
||||||
@@ -26,6 +63,12 @@ pub struct Backend<M: Module> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Backend<JITModule> {
|
impl Backend<JITModule> {
|
||||||
|
/// Create a new JIT backend for compiling NGR into memory.
|
||||||
|
///
|
||||||
|
/// The provided output buffer is not for the compiled code, but for the output
|
||||||
|
/// of any `print` expressions that are evaluated. If set to `None`, the output
|
||||||
|
/// will be written to `stdout` as per normal, but if a String buffer is provided,
|
||||||
|
/// it will be extended by any `print` statements that happen during code execution.
|
||||||
pub fn jit(output_buffer: Option<String>) -> Result<Self, BackendError> {
|
pub fn jit(output_buffer: Option<String>) -> Result<Self, BackendError> {
|
||||||
let platform = Triple::host();
|
let platform = Triple::host();
|
||||||
let isa_builder = isa::lookup(platform.clone())?;
|
let isa_builder = isa::lookup(platform.clone())?;
|
||||||
@@ -50,12 +93,24 @@ impl Backend<JITModule> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a compiled function ID, get a pointer to where that function was written
|
||||||
|
/// in memory.
|
||||||
|
///
|
||||||
|
/// The data at this pointer should not be mutated unless you really, really,
|
||||||
|
/// really know what you're doing. It can be run by casting it into a Rust
|
||||||
|
/// `fn() -> ()`, and then calling it from normal Rust.
|
||||||
pub fn bytes(&self, function_id: FuncId) -> *const u8 {
|
pub fn bytes(&self, function_id: FuncId) -> *const u8 {
|
||||||
self.module.get_finalized_function(function_id)
|
self.module.get_finalized_function(function_id)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Backend<ObjectModule> {
|
impl Backend<ObjectModule> {
|
||||||
|
/// Generate a backend for compiling into an object file for the given target.
|
||||||
|
///
|
||||||
|
/// This backend will generate a single output file per `Backend` object, although
|
||||||
|
/// that file may have multiple functions defined within it. Data between those
|
||||||
|
/// functions (in particular, strings) will be defined once and shared between
|
||||||
|
/// the different functions.
|
||||||
pub fn object_file(platform: Triple) -> Result<Self, BackendError> {
|
pub fn object_file(platform: Triple) -> Result<Self, BackendError> {
|
||||||
let isa_builder = isa::lookup(platform.clone())?;
|
let isa_builder = isa::lookup(platform.clone())?;
|
||||||
let mut settings_builder = settings::builder();
|
let mut settings_builder = settings::builder();
|
||||||
@@ -76,12 +131,22 @@ impl Backend<ObjectModule> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given all the functions defined, return the bytes the object file should contain.
|
||||||
pub fn bytes(self) -> Result<Vec<u8>, BackendError> {
|
pub fn bytes(self) -> Result<Vec<u8>, BackendError> {
|
||||||
self.module.finish().emit().map_err(Into::into)
|
self.module.finish().emit().map_err(Into::into)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<M: Module> Backend<M> {
|
impl<M: Module> Backend<M> {
|
||||||
|
/// Define a string within the current backend.
|
||||||
|
///
|
||||||
|
/// Note that this is a Cranelift [`DataId`], which then must be redeclared inside the
|
||||||
|
/// context of any functions or data items that want to use it. That being said, the
|
||||||
|
/// string value will be defined once in the file and then shared by all referencers.
|
||||||
|
///
|
||||||
|
/// This function will automatically add a null character (`'\0'`) to the end of the
|
||||||
|
/// string, to ensure that strings are non-terminated for interactions with other
|
||||||
|
/// languages.
|
||||||
pub fn define_string(&mut self, s: &str) -> Result<DataId, BackendError> {
|
pub fn define_string(&mut self, s: &str) -> Result<DataId, BackendError> {
|
||||||
let name = format!("<string_constant>{}", s);
|
let name = format!("<string_constant>{}", s);
|
||||||
let s0 = format!("{}\0", s);
|
let s0 = format!("{}\0", s);
|
||||||
@@ -97,6 +162,11 @@ impl<M: Module> Backend<M> {
|
|||||||
Ok(global_id)
|
Ok(global_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Define a global variable within the current backend.
|
||||||
|
///
|
||||||
|
/// These variables can be shared between functions, and will be exported from the
|
||||||
|
/// module itself as public data in the case of static compilation. There initial
|
||||||
|
/// value will be null.
|
||||||
pub fn define_variable(&mut self, name: String) -> Result<DataId, BackendError> {
|
pub fn define_variable(&mut self, name: String) -> Result<DataId, BackendError> {
|
||||||
self.data_ctx.define(Box::new(EMPTY_DATUM));
|
self.data_ctx.define(Box::new(EMPTY_DATUM));
|
||||||
let id = self
|
let id = self
|
||||||
@@ -108,6 +178,11 @@ impl<M: Module> Backend<M> {
|
|||||||
Ok(id)
|
Ok(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a pointer to the output buffer for `print`ing, or `null`.
|
||||||
|
///
|
||||||
|
/// As suggested, returns `null` in the case where the user has not provided an
|
||||||
|
/// output buffer; it is your responsibility to check for this case and do
|
||||||
|
/// something sensible.
|
||||||
pub fn output_buffer_ptr(&mut self) -> *mut String {
|
pub fn output_buffer_ptr(&mut self) -> *mut String {
|
||||||
if let Some(str) = self.output_buffer.as_mut() {
|
if let Some(str) = self.output_buffer.as_mut() {
|
||||||
str as *mut String
|
str as *mut String
|
||||||
@@ -116,6 +191,10 @@ impl<M: Module> Backend<M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get any captured output `print`ed by the program during execution.
|
||||||
|
///
|
||||||
|
/// If an output buffer was not provided, or if the program has not done any
|
||||||
|
/// printing, then this function will return an empty string.
|
||||||
pub fn output(self) -> String {
|
pub fn output(self) -> String {
|
||||||
if let Some(s) = self.output_buffer {
|
if let Some(s) = self.output_buffer {
|
||||||
s
|
s
|
||||||
|
|||||||
@@ -2,8 +2,27 @@ use crate::backend::runtime::RuntimeFunctionError;
|
|||||||
use codespan_reporting::diagnostic::Diagnostic;
|
use codespan_reporting::diagnostic::Diagnostic;
|
||||||
use cranelift_codegen::{isa::LookupError, settings::SetError, CodegenError};
|
use cranelift_codegen::{isa::LookupError, settings::SetError, CodegenError};
|
||||||
use cranelift_module::ModuleError;
|
use cranelift_module::ModuleError;
|
||||||
|
use internment::ArcIntern;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// An error in the translation to a backend (either the JIT or the static compiler).
|
||||||
|
///
|
||||||
|
/// In general, this is just a nice summary error type for a bunch of downstream
|
||||||
|
/// errors; the exception are internal errors from builtin functions or variable
|
||||||
|
/// lookups.
|
||||||
|
///
|
||||||
|
/// Unlike some other errors in the system, the translation to a `Diagnostic` does
|
||||||
|
/// not necessarily provide a whole lot of value, because we have lost most of the
|
||||||
|
/// source information by the time we're generating these errors. That being said,
|
||||||
|
/// people who want to provide nicer error messages might consider using the
|
||||||
|
/// translation through `Diagnostic` anyways, just in case we add more information
|
||||||
|
/// in the future.
|
||||||
|
///
|
||||||
|
/// Finally, the `PartialEq` for this function is a bit fuzzy. In some cases, it
|
||||||
|
/// ensures that the errors match exactly. In other cases, though, it just checks to
|
||||||
|
/// see if the two errors are of the same class; e.g., it will return true if both
|
||||||
|
/// errors are `BackendError::CodegenError`, regardless of what the specific
|
||||||
|
/// `CodegenError` is.
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum BackendError {
|
pub enum BackendError {
|
||||||
#[error("Cranelift module error: {0}")]
|
#[error("Cranelift module error: {0}")]
|
||||||
@@ -11,7 +30,7 @@ pub enum BackendError {
|
|||||||
#[error("Builtin function error: {0}")]
|
#[error("Builtin function error: {0}")]
|
||||||
BuiltinError(#[from] RuntimeFunctionError),
|
BuiltinError(#[from] RuntimeFunctionError),
|
||||||
#[error("Internal variable lookup error")]
|
#[error("Internal variable lookup error")]
|
||||||
VariableLookupFailure,
|
VariableLookupFailure(ArcIntern<String>),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
CodegenError(#[from] CodegenError),
|
CodegenError(#[from] CodegenError),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -31,9 +50,8 @@ impl From<BackendError> for Diagnostic<usize> {
|
|||||||
BackendError::BuiltinError(me) => {
|
BackendError::BuiltinError(me) => {
|
||||||
Diagnostic::error().with_message(format!("Internal runtime function error: {}", me))
|
Diagnostic::error().with_message(format!("Internal runtime function error: {}", me))
|
||||||
}
|
}
|
||||||
BackendError::VariableLookupFailure => {
|
BackendError::VariableLookupFailure(x) => Diagnostic::error()
|
||||||
Diagnostic::error().with_message("Internal variable lookup error!")
|
.with_message(format!("Internal variable lookup error for {}", x)),
|
||||||
}
|
|
||||||
BackendError::CodegenError(me) => {
|
BackendError::CodegenError(me) => {
|
||||||
Diagnostic::error().with_message(format!("Internal codegen error: {}", me))
|
Diagnostic::error().with_message(format!("Internal codegen error: {}", me))
|
||||||
}
|
}
|
||||||
@@ -58,8 +76,12 @@ impl PartialEq for BackendError {
|
|||||||
_ => false,
|
_ => false,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// because the underlying `CodegenError` doesn't implement `PartialEq',
|
||||||
|
// we just check that they're both `CodegenError`s.
|
||||||
BackendError::CodegenError(_) => matches!(other, BackendError::CodegenError(_)),
|
BackendError::CodegenError(_) => matches!(other, BackendError::CodegenError(_)),
|
||||||
|
|
||||||
|
// because the underlying `ModuleError` doesn't implement `PartialEq',
|
||||||
|
// we just check that they're both `Cranelift`s.
|
||||||
BackendError::Cranelift(_) => matches!(other, BackendError::Cranelift(_)),
|
BackendError::Cranelift(_) => matches!(other, BackendError::Cranelift(_)),
|
||||||
|
|
||||||
BackendError::LookupError(a) => match other {
|
BackendError::LookupError(a) => match other {
|
||||||
@@ -72,7 +94,10 @@ impl PartialEq for BackendError {
|
|||||||
_ => false,
|
_ => false,
|
||||||
},
|
},
|
||||||
|
|
||||||
BackendError::VariableLookupFailure => other == &BackendError::VariableLookupFailure,
|
BackendError::VariableLookupFailure(a) => match other {
|
||||||
|
BackendError::VariableLookupFailure(b) => a == b,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
|
||||||
BackendError::Write(a) => match other {
|
BackendError::Write(a) => match other {
|
||||||
BackendError::Write(b) => a == b,
|
BackendError::Write(b) => a == b,
|
||||||
|
|||||||
@@ -8,6 +8,19 @@ use cranelift_object::ObjectModule;
|
|||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
impl Backend<JITModule> {
|
impl Backend<JITModule> {
|
||||||
|
/// Evaluate the given IR, returning the output it prints.
|
||||||
|
///
|
||||||
|
/// This builds and executes the program using the JIT backend, using a fresh JIT runtime
|
||||||
|
/// that should be independent of any other runtimes being executed. As such, it should be
|
||||||
|
/// impossible for a program being executed by this function to interact with another, parallel
|
||||||
|
/// execution of the function. If you actually want them to interact, you'll need to combine
|
||||||
|
/// them into the same `Program` before execution.
|
||||||
|
///
|
||||||
|
/// One important note: The runtime used by this function does not currently implement
|
||||||
|
/// overflow/underflow erroring the same way that other evaluation functions within this
|
||||||
|
/// library do. So, if you're validating equivalence between them, you'll want to weed
|
||||||
|
/// out examples that overflow/underflow before checking equivalence. (This is the behavior
|
||||||
|
/// of the built-in test systems.)
|
||||||
pub fn eval(program: Program) -> Result<String, EvalError> {
|
pub fn eval(program: Program) -> Result<String, EvalError> {
|
||||||
let mut jitter = Backend::jit(Some(String::new()))?;
|
let mut jitter = Backend::jit(Some(String::new()))?;
|
||||||
let function_id = jitter.compile_function("test", program)?;
|
let function_id = jitter.compile_function("test", program)?;
|
||||||
@@ -20,6 +33,20 @@ impl Backend<JITModule> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Backend<ObjectModule> {
|
impl Backend<ObjectModule> {
|
||||||
|
/// Evalute the given IR, returning the output it prints.
|
||||||
|
///
|
||||||
|
/// This build the program as a standalone object in a temporary directory, and then links
|
||||||
|
/// and runs it using the provided runtime system (see `CARGO_MANIFEST_DIR/runtime/`). To
|
||||||
|
/// do so, it assumes that there is a version of `clang` available in the current PATH.
|
||||||
|
///
|
||||||
|
/// This routine is regularly tested under Windows, Mac, and Linux, and should work across
|
||||||
|
/// other platforms that support `clang`.
|
||||||
|
///
|
||||||
|
/// One important note: The runtime used by this function does not currently implement
|
||||||
|
/// overflow/underflow erroring the same way that other evaluation functions within this
|
||||||
|
/// library do. So, if you're validating equivalence between them, you'll want to weed
|
||||||
|
/// out examples that overflow/underflow before checking equivalence. (This is the behavior
|
||||||
|
/// of the built-in test systems.)
|
||||||
pub fn eval(program: Program) -> Result<String, EvalError> {
|
pub fn eval(program: Program) -> Result<String, EvalError> {
|
||||||
//use pretty::{Arena, Pretty};
|
//use pretty::{Arena, Pretty};
|
||||||
//let allocator = Arena::<()>::new();
|
//let allocator = Arena::<()>::new();
|
||||||
@@ -40,18 +67,26 @@ impl Backend<ObjectModule> {
|
|||||||
if output.status.success() {
|
if output.status.success() {
|
||||||
Ok(std::string::String::from_utf8_lossy(&output.stdout).to_string())
|
Ok(std::string::String::from_utf8_lossy(&output.stdout).to_string())
|
||||||
} else {
|
} else {
|
||||||
Err(EvalError::IO(format!(
|
Err(EvalError::ExitCode(output.status))
|
||||||
"Exitted with error code {}",
|
|
||||||
output.status
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Err(EvalError::IO(
|
Err(EvalError::RuntimeOutput(
|
||||||
std::string::String::from_utf8_lossy(&output.stderr).to_string(),
|
std::string::String::from_utf8_lossy(&output.stderr).to_string(),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Link the generated object into an executable.
|
||||||
|
///
|
||||||
|
/// Currently, our runtime system is a single file, and ends up being the function
|
||||||
|
/// that includes `main`. (It then calls the `gogogo` function which serves as the
|
||||||
|
/// entry point for our compiled code.) This function thus just uses `clang` to
|
||||||
|
/// compile the C file with the generated object file to produce the executable.
|
||||||
|
/// Conveniently, `clang` also sets execute permissions under unix-like file systems.
|
||||||
|
///
|
||||||
|
/// This function assumes that this compilation and linking should run without any
|
||||||
|
/// output, so changes to the RTS should make 100% sure that they do not generate
|
||||||
|
/// any compiler warnings.
|
||||||
fn link(object_file: &Path, executable_path: &Path) -> Result<(), EvalError> {
|
fn link(object_file: &Path, executable_path: &Path) -> Result<(), EvalError> {
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
@@ -67,7 +102,7 @@ impl Backend<ObjectModule> {
|
|||||||
.output()?;
|
.output()?;
|
||||||
|
|
||||||
if !output.stderr.is_empty() {
|
if !output.stderr.is_empty() {
|
||||||
return Err(EvalError::IO(
|
return Err(EvalError::Linker(
|
||||||
std::string::String::from_utf8_lossy(&output.stderr).to_string(),
|
std::string::String::from_utf8_lossy(&output.stderr).to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@@ -77,12 +112,17 @@ impl Backend<ObjectModule> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
proptest::proptest! {
|
proptest::proptest! {
|
||||||
|
// This is the obvious test to make sure that our static compilation path works
|
||||||
|
// without error, assuming any possible input ... well, any possible input that
|
||||||
|
// doesn't involve overflow or underflow.
|
||||||
#[test]
|
#[test]
|
||||||
fn file_backend_works(program: Program) {
|
fn static_backend(program: Program) {
|
||||||
use crate::eval::PrimOpError;
|
use crate::eval::PrimOpError;
|
||||||
|
|
||||||
let basic_result = program.eval();
|
let basic_result = program.eval();
|
||||||
|
|
||||||
|
// windows `printf` is going to terminate lines with "\r\n", so we need to adjust
|
||||||
|
// our test result here.
|
||||||
#[cfg(target_family="windows")]
|
#[cfg(target_family="windows")]
|
||||||
let basic_result = basic_result.map(|x| x.replace('\n', "\r\n"));
|
let basic_result = basic_result.map(|x| x.replace('\n', "\r\n"));
|
||||||
|
|
||||||
@@ -92,8 +132,11 @@ proptest::proptest! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is the obvious test to make sure that our JIT compilation path works
|
||||||
|
// without error, assuming any possible input ... well, any possible input that
|
||||||
|
// doesn't involve overflow or underflow.
|
||||||
#[test]
|
#[test]
|
||||||
fn jit_backend_works(program: Program) {
|
fn jit_backend(program: Program) {
|
||||||
use crate::eval::PrimOpError;
|
use crate::eval::PrimOpError;
|
||||||
|
|
||||||
let basic_result = program.eval();
|
let basic_result = program.eval();
|
||||||
|
|||||||
@@ -8,15 +8,31 @@ use cranelift_codegen::ir::{
|
|||||||
use cranelift_codegen::isa::CallConv;
|
use cranelift_codegen::isa::CallConv;
|
||||||
use cranelift_codegen::Context;
|
use cranelift_codegen::Context;
|
||||||
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
|
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
|
||||||
use cranelift_module::{FuncId, Linkage, Module, ModuleError};
|
use cranelift_module::{FuncId, Linkage, Module};
|
||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
|
|
||||||
use crate::backend::error::BackendError;
|
use crate::backend::error::BackendError;
|
||||||
use crate::backend::Backend;
|
use crate::backend::Backend;
|
||||||
|
|
||||||
|
/// When we're compiling, we might need to reference some of the strings built into
|
||||||
|
/// the source code; to do so, we need a `GlobalValue`. Perhaps unexpectedly, given
|
||||||
|
/// the name, `GlobalValue`s are specific to a single function we're compiling, so
|
||||||
|
/// we end up computing this table for every function.
|
||||||
|
///
|
||||||
|
/// This just a handy type alias to avoid a lot of confusion in the functions.
|
||||||
type StringTable = HashMap<ArcIntern<String>, GlobalValue>;
|
type StringTable = HashMap<ArcIntern<String>, GlobalValue>;
|
||||||
|
|
||||||
impl<M: Module> Backend<M> {
|
impl<M: Module> Backend<M> {
|
||||||
|
/// Compile the given `Program` into a function with the given name.
|
||||||
|
///
|
||||||
|
/// At some point, the use of `Program` is going to change; however, for the
|
||||||
|
/// moment, we have no notion of a function in our language so the whole input
|
||||||
|
/// is converted into a single output function. The type of the generated
|
||||||
|
/// function is, essentially, `fn() -> ()`: it takes no arguments and returns
|
||||||
|
/// no value.
|
||||||
|
///
|
||||||
|
/// The function provided can then be either written to a file (if using a
|
||||||
|
/// static Cranelift backend) or executed directly (if using the Cranelift JIT).
|
||||||
pub fn compile_function(
|
pub fn compile_function(
|
||||||
&mut self,
|
&mut self,
|
||||||
function_name: &str,
|
function_name: &str,
|
||||||
@@ -28,21 +44,47 @@ impl<M: Module> Backend<M> {
|
|||||||
call_conv: CallConv::SystemV,
|
call_conv: CallConv::SystemV,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// this generates the handle for the function that we'll eventually want to
|
||||||
|
// return to the user. For now, we declare all functions defined by this
|
||||||
|
// function as public/global/exported, although we may want to reconsider
|
||||||
|
// this decision later.
|
||||||
let func_id =
|
let func_id =
|
||||||
self.module
|
self.module
|
||||||
.declare_function(function_name, Linkage::Export, &basic_signature)?;
|
.declare_function(function_name, Linkage::Export, &basic_signature)?;
|
||||||
let mut ctx = Context::new();
|
|
||||||
ctx.func =
|
|
||||||
Function::with_name_signature(UserFuncName::user(0, func_id.as_u32()), basic_signature);
|
|
||||||
|
|
||||||
|
// Next we have to generate the compilation context for the rest of this
|
||||||
|
// function. Currently, we generate a fresh context for every function.
|
||||||
|
// Since we're only generating one function per `Program`, this makes
|
||||||
|
// complete sense. However, in the future, we may want to revisit this
|
||||||
|
// decision.
|
||||||
|
let mut ctx = Context::new();
|
||||||
|
let user_func_name = UserFuncName::user(0, func_id.as_u32());
|
||||||
|
ctx.func = Function::with_name_signature(user_func_name, basic_signature);
|
||||||
|
|
||||||
|
// We generate a table of every string that we use in the program, here.
|
||||||
|
// Cranelift is going to require us to have this in a particular structure
|
||||||
|
// (`GlobalValue`) so that we can reference them later, and it's going to
|
||||||
|
// be tricky to generate those on the fly. So we just generate the set we
|
||||||
|
// need here, and then have ir around in the table for later.
|
||||||
let string_table = self.build_string_table(&mut ctx.func, &program)?;
|
let string_table = self.build_string_table(&mut ctx.func, &program)?;
|
||||||
let mut variable_table = HashMap::new();
|
|
||||||
let mut next_var_num = 1;
|
// In the future, we might want to see what runtime functions the function
|
||||||
|
// we were given uses, and then only include those functions that we care
|
||||||
|
// about. Presumably, we'd use some sort of lookup table like we do for
|
||||||
|
// strings. But for now, we only have one runtime function, and we're pretty
|
||||||
|
// sure we're always going to use it, so we just declare it (and reference
|
||||||
|
// it) directly.
|
||||||
let print_func_ref = self.runtime_functions.include_runtime_function(
|
let print_func_ref = self.runtime_functions.include_runtime_function(
|
||||||
"print",
|
"print",
|
||||||
&mut self.module,
|
&mut self.module,
|
||||||
&mut ctx.func,
|
&mut ctx.func,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// In the case of the JIT, there may be symbols we've already defined outside
|
||||||
|
// the context of this particular `Progam`, which we might want to reference.
|
||||||
|
// Just like with strings, generating the `GlobalValue`s we need can potentially
|
||||||
|
// be a little tricky to do on the fly, so we generate the complete list right
|
||||||
|
// here and then use it later.
|
||||||
let pre_defined_symbols: HashMap<String, GlobalValue> = self
|
let pre_defined_symbols: HashMap<String, GlobalValue> = self
|
||||||
.defined_symbols
|
.defined_symbols
|
||||||
.iter()
|
.iter()
|
||||||
@@ -52,67 +94,88 @@ impl<M: Module> Backend<M> {
|
|||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
// The last table we're going to need is our local variable table, to store
|
||||||
|
// variables used in this `Program` but not used outside of it. For whatever
|
||||||
|
// reason, Cranelift requires us to generate unique indexes for each of our
|
||||||
|
// variables; we just use a simple incrementing counter for that.
|
||||||
|
let mut variable_table = HashMap::new();
|
||||||
|
let mut next_var_num = 1;
|
||||||
|
|
||||||
|
// Finally (!), we generate the function builder that we're going to use to
|
||||||
|
// make this function!
|
||||||
let mut fctx = FunctionBuilderContext::new();
|
let mut fctx = FunctionBuilderContext::new();
|
||||||
let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx);
|
let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx);
|
||||||
|
|
||||||
|
// Make the initial block to put instructions in. Later, when we have control
|
||||||
|
// flow, we might add more blocks after this one. But, for now, we only have
|
||||||
|
// the one block.
|
||||||
let main_block = builder.create_block();
|
let main_block = builder.create_block();
|
||||||
builder.switch_to_block(main_block);
|
builder.switch_to_block(main_block);
|
||||||
|
|
||||||
|
// Compiling a function is just compiling each of the statements in order.
|
||||||
|
// At the moment, we do the pattern match for statements here, and then
|
||||||
|
// directly compile the statements. If/when we add more statement forms,
|
||||||
|
// this is likely to become more cumbersome, and we'll want to separate
|
||||||
|
// these off. But for now, given the amount of tables we keep around to track
|
||||||
|
// state, it's easier to just include them.
|
||||||
for stmt in program.statements.drain(..) {
|
for stmt in program.statements.drain(..) {
|
||||||
match stmt {
|
match stmt {
|
||||||
|
// Print statements are fairly easy to compile: we just lookup the
|
||||||
|
// output buffer, the address of the string to print, and the value
|
||||||
|
// of whatever variable we're printing. Then we just call print.
|
||||||
Statement::Print(ann, var) => {
|
Statement::Print(ann, var) => {
|
||||||
|
// Get the output buffer (or null) from our general compilation context.
|
||||||
let buffer_ptr = self.output_buffer_ptr();
|
let buffer_ptr = self.output_buffer_ptr();
|
||||||
let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64);
|
let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64);
|
||||||
|
|
||||||
|
// Get a reference to the string we want to print.
|
||||||
let local_name_ref = string_table.get(&var).unwrap();
|
let local_name_ref = string_table.get(&var).unwrap();
|
||||||
let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref);
|
let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref);
|
||||||
let val = ValueOrRef::Ref(ann, var).into_cranelift(
|
|
||||||
|
// Look up the value for the variable. Because this might be a
|
||||||
|
// global variable (and that requires special logic), we just turn
|
||||||
|
// this into an `Expression` and re-use the logic in that implementation.
|
||||||
|
let val = Expression::Reference(ann, var).into_crane(
|
||||||
&mut builder,
|
&mut builder,
|
||||||
&variable_table,
|
&variable_table,
|
||||||
&pre_defined_symbols,
|
&pre_defined_symbols,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// Finally, we can generate the call to print.
|
||||||
builder
|
builder
|
||||||
.ins()
|
.ins()
|
||||||
.call(print_func_ref, &[buffer_ptr, name_ptr, val]);
|
.call(print_func_ref, &[buffer_ptr, name_ptr, val]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Variable binding is a little more con
|
||||||
Statement::Binding(_, var_name, value) => {
|
Statement::Binding(_, var_name, value) => {
|
||||||
let val = match value {
|
// Kick off to the `Expression` implementation to see what value we're going
|
||||||
Expression::Value(_, Value::Number(_, v)) => {
|
// to bind to this variable.
|
||||||
builder.ins().iconst(types::I64, v)
|
let val =
|
||||||
}
|
value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?;
|
||||||
|
|
||||||
Expression::Reference(_, name) => {
|
|
||||||
let value_var_num = variable_table.get(&name).unwrap();
|
|
||||||
builder.use_var(Variable::new(*value_var_num))
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Primitive(_, prim, mut vals) => {
|
|
||||||
let right = vals.pop().unwrap().into_cranelift(
|
|
||||||
&mut builder,
|
|
||||||
&variable_table,
|
|
||||||
&pre_defined_symbols,
|
|
||||||
)?;
|
|
||||||
let left = vals.pop().unwrap().into_cranelift(
|
|
||||||
&mut builder,
|
|
||||||
&variable_table,
|
|
||||||
&pre_defined_symbols,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
match prim {
|
|
||||||
Primitive::Plus => builder.ins().iadd(left, right),
|
|
||||||
Primitive::Minus => builder.ins().isub(left, right),
|
|
||||||
Primitive::Times => builder.ins().imul(left, right),
|
|
||||||
Primitive::Divide => builder.ins().sdiv(left, right),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
// Now the question is: is this a local variable, or a global one?
|
||||||
if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) {
|
if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) {
|
||||||
|
// It's a global variable! In this case, we assume that someone has already
|
||||||
|
// dedicated some space in memory to store this value. We look this location
|
||||||
|
// up, and then tell Cranelift to store the value there.
|
||||||
let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
|
let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
|
||||||
builder.ins().store(MemFlags::new(), val, val_ptr, 0);
|
builder.ins().store(MemFlags::new(), val, val_ptr, 0);
|
||||||
} else {
|
} else {
|
||||||
|
// It's a local variable! In this case, we need to allocate a new Cranelift
|
||||||
|
// `Variable` for this variable, which we do using our `next_var_num` counter.
|
||||||
|
// (While we're doing this, we also increment `next_var_num`, so that we get
|
||||||
|
// a fresh `Variable` next time. This is one of those very narrow cases in which
|
||||||
|
// I wish Rust had an increment expression.)
|
||||||
let var = Variable::new(next_var_num);
|
let var = Variable::new(next_var_num);
|
||||||
variable_table.insert(var_name, next_var_num);
|
|
||||||
next_var_num += 1;
|
next_var_num += 1;
|
||||||
|
|
||||||
|
// We can add the variable directly to our local variable map; it's `Copy`.
|
||||||
|
variable_table.insert(var_name, var);
|
||||||
|
|
||||||
|
// Now we tell Cranelift about our new variable, which has type I64 because
|
||||||
|
// everything we have at this point is of type I64. Once it's declare, we
|
||||||
|
// define it as having the value we computed above.
|
||||||
builder.declare_var(var, types::I64);
|
builder.declare_var(var, types::I64);
|
||||||
builder.def_var(var, val);
|
builder.def_var(var, val);
|
||||||
}
|
}
|
||||||
@@ -120,15 +183,30 @@ impl<M: Module> Backend<M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now that we're done, inject a return function (one with no actual value; basically
|
||||||
|
// the equivalent of Rust's `return;`). We then seal the block (which lets Cranelift
|
||||||
|
// know that the block is done), and then finalize the function (which lets Cranelift
|
||||||
|
// know we're done with the function).
|
||||||
builder.ins().return_(&[]);
|
builder.ins().return_(&[]);
|
||||||
builder.seal_block(main_block);
|
builder.seal_block(main_block);
|
||||||
builder.finalize();
|
builder.finalize();
|
||||||
|
|
||||||
|
// This is a little odd. We want to tell the rest of Cranelift about this function,
|
||||||
|
// so we register it using the function ID and our builder context. However, the
|
||||||
|
// result of this function isn't actually super helpful. So we ignore it, unless
|
||||||
|
// it's an error.
|
||||||
let _ = self.module.define_function(func_id, &mut ctx)?;
|
let _ = self.module.define_function(func_id, &mut ctx)?;
|
||||||
|
|
||||||
|
// done!
|
||||||
Ok(func_id)
|
Ok(func_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build the string table for use in referencing strings later.
|
||||||
|
//
|
||||||
|
// This function is slightly smart, in that it only puts strings in the table that
|
||||||
|
// are used by the `Program`. (Thanks to `Progam::strings()`!) If the strings have
|
||||||
|
// been declared globally, via `Backend::define_string()`, we will re-use that data.
|
||||||
|
// Otherwise, this will define the string for you.
|
||||||
fn build_string_table(
|
fn build_string_table(
|
||||||
&mut self,
|
&mut self,
|
||||||
func: &mut Function,
|
func: &mut Function,
|
||||||
@@ -149,30 +227,73 @@ impl<M: Module> Backend<M> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ValueOrRef {
|
impl Expression {
|
||||||
fn into_cranelift(
|
fn into_crane(
|
||||||
self,
|
self,
|
||||||
builder: &mut FunctionBuilder,
|
builder: &mut FunctionBuilder,
|
||||||
local_variables: &HashMap<ArcIntern<String>, usize>,
|
local_variables: &HashMap<ArcIntern<String>, Variable>,
|
||||||
global_variables: &HashMap<String, GlobalValue>,
|
global_variables: &HashMap<String, GlobalValue>,
|
||||||
) -> Result<entities::Value, ModuleError> {
|
) -> Result<entities::Value, BackendError> {
|
||||||
match self {
|
match self {
|
||||||
ValueOrRef::Value(_, value) => match value {
|
// Values are pretty straightforward to compile, mostly because we only
|
||||||
Value::Number(_base, numval) => Ok(builder.ins().iconst(types::I64, numval)),
|
// have one type of variable, and it's an integer type.
|
||||||
},
|
Expression::Value(_, Value::Number(_, v)) => Ok(builder.ins().iconst(types::I64, v)),
|
||||||
|
|
||||||
ValueOrRef::Ref(_, name) => {
|
Expression::Reference(_, name) => {
|
||||||
if let Some(local_num) = local_variables.get(&name) {
|
// first we see if this is a local variable (which is nicer, from an
|
||||||
return Ok(builder.use_var(Variable::new(*local_num)));
|
// optimization point of view.)
|
||||||
|
if let Some(local_var) = local_variables.get(&name) {
|
||||||
|
return Ok(builder.use_var(*local_var));
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(global_id) = global_variables.get(name.as_str()) {
|
// then we check to see if this is a global reference, which requires us to
|
||||||
let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
|
// first lookup where the value is stored, and then load it.
|
||||||
|
if let Some(global_var) = global_variables.get(name.as_ref()) {
|
||||||
|
let val_ptr = builder.ins().symbol_value(types::I64, *global_var);
|
||||||
return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0));
|
return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
Err(ModuleError::Undeclared(name.to_string()))
|
// this should never happen, because we should have made sure that there are
|
||||||
|
// no unbound variables a long time before this. but still ...
|
||||||
|
Err(BackendError::VariableLookupFailure(name))
|
||||||
|
}
|
||||||
|
|
||||||
|
Expression::Primitive(_, prim, mut vals) => {
|
||||||
|
// we're going to use `pop`, so we're going to pull and compile the right value ...
|
||||||
|
let right =
|
||||||
|
vals.pop()
|
||||||
|
.unwrap()
|
||||||
|
.into_crane(builder, local_variables, global_variables)?;
|
||||||
|
// ... and then the left.
|
||||||
|
let left =
|
||||||
|
vals.pop()
|
||||||
|
.unwrap()
|
||||||
|
.into_crane(builder, local_variables, global_variables)?;
|
||||||
|
|
||||||
|
// then we just need to tell Cranelift how to do each of our primitives! Much
|
||||||
|
// like Statements, above, we probably want to eventually shuffle this off into
|
||||||
|
// a separate function (maybe something off `Primitive`), but for now it's simple
|
||||||
|
// enough that we just do the `match` here.
|
||||||
|
match prim {
|
||||||
|
Primitive::Plus => Ok(builder.ins().iadd(left, right)),
|
||||||
|
Primitive::Minus => Ok(builder.ins().isub(left, right)),
|
||||||
|
Primitive::Times => Ok(builder.ins().imul(left, right)),
|
||||||
|
Primitive::Divide => Ok(builder.ins().sdiv(left, right)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Just to avoid duplication, this just leverages the `From<ValueOrRef>` trait implementation
|
||||||
|
// for `ValueOrRef` to compile this via the `Expression` logic, above.
|
||||||
|
impl ValueOrRef {
|
||||||
|
fn into_crane(
|
||||||
|
self,
|
||||||
|
builder: &mut FunctionBuilder,
|
||||||
|
local_variables: &HashMap<ArcIntern<String>, Variable>,
|
||||||
|
global_variables: &HashMap<String, GlobalValue>,
|
||||||
|
) -> Result<entities::Value, BackendError> {
|
||||||
|
Expression::from(self).into_crane(builder, local_variables, global_variables)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,9 +8,14 @@ use std::fmt::Write;
|
|||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// An object for querying / using functions built into the runtime.
|
||||||
|
///
|
||||||
|
/// Right now, this is a quite a bit of boilerplate for very nebulous
|
||||||
|
/// value. However, as the number of built-in functions gets large, it's
|
||||||
|
/// nice to have a single point to register and query them, so here we
|
||||||
|
/// go.
|
||||||
pub struct RuntimeFunctions {
|
pub struct RuntimeFunctions {
|
||||||
builtin_functions: HashMap<String, FuncId>,
|
builtin_functions: HashMap<String, FuncId>,
|
||||||
_referenced_functions: Vec<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error, PartialEq)]
|
#[derive(Debug, Error, PartialEq)]
|
||||||
@@ -19,25 +24,27 @@ pub enum RuntimeFunctionError {
|
|||||||
CannotFindRuntimeFunction(String),
|
CannotFindRuntimeFunction(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) {
|
|
||||||
let cstr = unsafe { CStr::from_ptr(name) };
|
|
||||||
let reconstituted = cstr.to_string_lossy();
|
|
||||||
|
|
||||||
if let Some(output_buffer) = unsafe { output_buffer.as_mut() } {
|
|
||||||
writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap();
|
|
||||||
} else {
|
|
||||||
println!("{} = {}", reconstituted, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RuntimeFunctions {
|
impl RuntimeFunctions {
|
||||||
|
/// Generate a new runtime function table for the given platform, and
|
||||||
|
/// declare them within the provided Cranelift module.
|
||||||
|
///
|
||||||
|
/// Note that this is very conservative: it assumes that your module
|
||||||
|
/// will want to use every runtime function. Unless the Cranelift object
|
||||||
|
/// builder is smart, this might inject a bunch of references (and thus
|
||||||
|
/// linker requirements) that aren't actually needed by your program.
|
||||||
|
///
|
||||||
|
/// Then again, right now there's exactly one runtime function, so ...
|
||||||
|
/// not a big deal.
|
||||||
pub fn new<M: Module>(platform: &Triple, module: &mut M) -> ModuleResult<RuntimeFunctions> {
|
pub fn new<M: Module>(platform: &Triple, module: &mut M) -> ModuleResult<RuntimeFunctions> {
|
||||||
let mut builtin_functions = HashMap::new();
|
let mut builtin_functions = HashMap::new();
|
||||||
let _referenced_functions = Vec::new();
|
|
||||||
|
|
||||||
let string_param = AbiParam::new(types::I64);
|
let string_param = AbiParam::new(types::I64);
|
||||||
let int64_param = AbiParam::new(types::I64);
|
let int64_param = AbiParam::new(types::I64);
|
||||||
|
|
||||||
|
// declare print for Cranelift; it's something we're going to import
|
||||||
|
// into the current module (it's compiled separately), and takes two
|
||||||
|
// strings and an integer. (Which ... turn out to all be the same
|
||||||
|
// underlying type, which is weird but the way it is.)
|
||||||
let print_id = module.declare_function(
|
let print_id = module.declare_function(
|
||||||
"print",
|
"print",
|
||||||
Linkage::Import,
|
Linkage::Import,
|
||||||
@@ -47,14 +54,19 @@ impl RuntimeFunctions {
|
|||||||
call_conv: CallConv::triple_default(platform),
|
call_conv: CallConv::triple_default(platform),
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// Toss this function in our internal dictionary, as well.
|
||||||
builtin_functions.insert("print".to_string(), print_id);
|
builtin_functions.insert("print".to_string(), print_id);
|
||||||
|
|
||||||
Ok(RuntimeFunctions {
|
Ok(RuntimeFunctions { builtin_functions })
|
||||||
builtin_functions,
|
|
||||||
_referenced_functions,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Include the named runtime function into the current Function context.
|
||||||
|
///
|
||||||
|
/// This is necessary for every runtime function reference within each
|
||||||
|
/// function. The returned `FuncRef` can be used in `call` invocations.
|
||||||
|
/// The only reason for this function to error is if you pass a name that
|
||||||
|
/// the runtime isn't familiar with.
|
||||||
pub fn include_runtime_function<M: Module>(
|
pub fn include_runtime_function<M: Module>(
|
||||||
&self,
|
&self,
|
||||||
name: &str,
|
name: &str,
|
||||||
@@ -69,7 +81,30 @@ impl RuntimeFunctions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Register live, local versions of the runtime functions into the JIT.
|
||||||
|
///
|
||||||
|
/// Note that these implementations are *not* the same as the ones defined
|
||||||
|
/// in `CARGO_MANIFEST_DIR/runtime/`, for ... reasons. It might be a good
|
||||||
|
/// change, in the future, to find a way to unify these implementations into
|
||||||
|
/// one; both to reduce the chance that they deviate, and to reduce overall
|
||||||
|
/// maintenance burden.
|
||||||
pub fn register_jit_implementations(builder: &mut JITBuilder) {
|
pub fn register_jit_implementations(builder: &mut JITBuilder) {
|
||||||
builder.symbol("print", runtime_print as *const u8);
|
builder.symbol("print", runtime_print as *const u8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print! This implementation is used in the JIT compiler, to actually print data. We
|
||||||
|
// use the `output_buffer` argument as an aid for testing; if it's non-NULL, it's a string
|
||||||
|
// we extend with the output, so that multiple JIT'd `Program`s can run concurrently
|
||||||
|
// without stomping over each other's output. If `output_buffer` is NULL, we just print
|
||||||
|
// to stdout.
|
||||||
|
extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) {
|
||||||
|
let cstr = unsafe { CStr::from_ptr(name) };
|
||||||
|
let reconstituted = cstr.to_string_lossy();
|
||||||
|
|
||||||
|
if let Some(output_buffer) = unsafe { output_buffer.as_mut() } {
|
||||||
|
writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap();
|
||||||
|
} else {
|
||||||
|
println!("{} = {}", reconstituted, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,17 +1,7 @@
|
|||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use codespan_reporting::diagnostic::Diagnostic;
|
|
||||||
use codespan_reporting::files::SimpleFiles;
|
|
||||||
use codespan_reporting::term;
|
|
||||||
use codespan_reporting::term::termcolor::{ColorChoice, StandardStream};
|
|
||||||
use cranelift_object::object;
|
|
||||||
|
|
||||||
use ngr::backend::Backend;
|
|
||||||
use ngr::backend::BackendError;
|
|
||||||
use ngr::ir::Program as IR;
|
|
||||||
use ngr::syntax::{ParserError, Program as Syntax};
|
|
||||||
use target_lexicon::Triple;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
|
/// Clap is great! Even though we don't have many command line arguments
|
||||||
|
/// yet, this is just really neat.
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[clap(author, version, about, long_about = None)]
|
#[clap(author, version, about, long_about = None)]
|
||||||
struct CommandLineArguments {
|
struct CommandLineArguments {
|
||||||
@@ -23,76 +13,14 @@ struct CommandLineArguments {
|
|||||||
file: String,
|
file: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
|
||||||
enum MainError {
|
|
||||||
#[error(transparent)]
|
|
||||||
Backend(#[from] BackendError),
|
|
||||||
#[error("Parser error")]
|
|
||||||
ParserError(#[from] ParserError),
|
|
||||||
#[error("IO error")]
|
|
||||||
IoError(#[from] std::io::Error),
|
|
||||||
#[error("write error")]
|
|
||||||
WriteError(#[from] object::write::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<MainError> for Diagnostic<usize> {
|
|
||||||
fn from(value: MainError) -> Self {
|
|
||||||
match value {
|
|
||||||
MainError::Backend(be) => be.into(),
|
|
||||||
MainError::ParserError(pe) => (&pe).into(),
|
|
||||||
MainError::IoError(e) => Diagnostic::error().with_message(format!("IO error: {}", e)),
|
|
||||||
MainError::WriteError(e) => {
|
|
||||||
Diagnostic::error().with_message(format!("Module write error: {}", e))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compile(file_database: &mut SimpleFiles<String, String>) -> Result<(), MainError> {
|
|
||||||
let args = CommandLineArguments::parse();
|
|
||||||
|
|
||||||
let syntax = Syntax::parse_file(file_database, &args.file)?;
|
|
||||||
let (mut errors, mut warnings) = syntax.validate();
|
|
||||||
let stop = !errors.is_empty();
|
|
||||||
let messages = errors
|
|
||||||
.drain(..)
|
|
||||||
.map(Into::into)
|
|
||||||
.chain(warnings.drain(..).map(Into::into));
|
|
||||||
let writer = StandardStream::stderr(ColorChoice::Auto);
|
|
||||||
let config = codespan_reporting::term::Config::default();
|
|
||||||
|
|
||||||
for message in messages {
|
|
||||||
term::emit(&mut writer.lock(), &config, file_database, &message).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
if stop {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let ir = IR::from(syntax.simplify());
|
|
||||||
let mut backend = Backend::object_file(Triple::host())?;
|
|
||||||
backend.compile_function("gogogo", ir)?;
|
|
||||||
let bytes = backend.bytes()?;
|
|
||||||
std::fs::write(args.output.unwrap_or_else(|| "output.o".to_string()), bytes)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut file_database = SimpleFiles::new();
|
let args = CommandLineArguments::parse();
|
||||||
|
let mut compiler = ngr::Compiler::default();
|
||||||
|
|
||||||
match compile(&mut file_database) {
|
let output_file = args.output.unwrap_or("output.o".to_string());
|
||||||
Ok(()) => {}
|
|
||||||
Err(e) => {
|
|
||||||
let writer = StandardStream::stderr(ColorChoice::Auto);
|
|
||||||
let config = codespan_reporting::term::Config::default();
|
|
||||||
|
|
||||||
term::emit(
|
if let Some(bytes) = compiler.compile(&args.file) {
|
||||||
&mut writer.lock(),
|
std::fs::write(&output_file, bytes)
|
||||||
&config,
|
.unwrap_or_else(|x| eprintln!("Could not write to file {}: {}", output_file, x));
|
||||||
&file_database,
|
|
||||||
&Diagnostic::from(e),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
135
src/bin/ngri.rs
135
src/bin/ngri.rs
@@ -1,130 +1,11 @@
|
|||||||
use codespan_reporting::diagnostic::Diagnostic;
|
use ngr::backend::BackendError;
|
||||||
use codespan_reporting::files::SimpleFiles;
|
|
||||||
use codespan_reporting::term::{self, Config};
|
|
||||||
use cranelift_jit::JITModule;
|
|
||||||
use cranelift_module::ModuleError;
|
|
||||||
use ngr::backend::{Backend, BackendError};
|
|
||||||
use ngr::ir::Program as IR;
|
|
||||||
use ngr::syntax::{Location, ParserError, Statement};
|
|
||||||
use pretty::termcolor::{ColorChoice, StandardStream, WriteColor};
|
|
||||||
use rustyline::error::ReadlineError;
|
use rustyline::error::ReadlineError;
|
||||||
use rustyline::DefaultEditor;
|
use rustyline::DefaultEditor;
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
pub struct RunLoop<'a> {
|
|
||||||
file_database: SimpleFiles<&'a str, String>,
|
|
||||||
jitter: Backend<JITModule>,
|
|
||||||
variable_binding_sites: HashMap<String, Location>,
|
|
||||||
gensym_index: usize,
|
|
||||||
writer: &'a mut dyn WriteColor,
|
|
||||||
config: Config,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::upper_case_acronyms)]
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
|
||||||
enum REPLError {
|
|
||||||
#[error("Error parsing statement: {0}")]
|
|
||||||
Parser(#[from] ParserError),
|
|
||||||
#[error("JIT error: {0}")]
|
|
||||||
JIT(#[from] BackendError),
|
|
||||||
#[error("Internal cranelift error: {0}")]
|
|
||||||
Cranelift(#[from] ModuleError),
|
|
||||||
#[error(transparent)]
|
|
||||||
Reporting(#[from] codespan_reporting::files::Error),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<REPLError> for Diagnostic<usize> {
|
|
||||||
fn from(value: REPLError) -> Self {
|
|
||||||
match value {
|
|
||||||
REPLError::Parser(err) => Diagnostic::from(&err),
|
|
||||||
REPLError::JIT(err) => Diagnostic::from(err),
|
|
||||||
REPLError::Cranelift(err) => Diagnostic::bug().with_message(format!("{}", err)),
|
|
||||||
REPLError::Reporting(err) => Diagnostic::bug().with_message(format!("{}", err)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> RunLoop<'a> {
|
|
||||||
pub fn new(writer: &'a mut dyn WriteColor, config: Config) -> Result<Self, BackendError> {
|
|
||||||
Ok(RunLoop {
|
|
||||||
file_database: SimpleFiles::new(),
|
|
||||||
jitter: Backend::jit(None)?,
|
|
||||||
variable_binding_sites: HashMap::new(),
|
|
||||||
gensym_index: 1,
|
|
||||||
writer,
|
|
||||||
config,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn emit_diagnostic(
|
|
||||||
&mut self,
|
|
||||||
diagnostic: Diagnostic<usize>,
|
|
||||||
) -> Result<(), codespan_reporting::files::Error> {
|
|
||||||
term::emit(self.writer, &self.config, &self.file_database, &diagnostic)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn process_input(&mut self, line_no: usize, command: String) {
|
|
||||||
if let Err(err) = self.process(line_no, command) {
|
|
||||||
if let Err(e) = self.emit_diagnostic(Diagnostic::from(err)) {
|
|
||||||
eprintln!(
|
|
||||||
"WOAH! System having trouble printing error messages. This is very bad. ({})",
|
|
||||||
e
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn process(&mut self, line_no: usize, command: String) -> Result<(), REPLError> {
|
|
||||||
let entry = self.file_database.add("entry", command);
|
|
||||||
let source = self
|
|
||||||
.file_database
|
|
||||||
.get(entry)
|
|
||||||
.expect("entry exists")
|
|
||||||
.source();
|
|
||||||
let syntax = Statement::parse(entry, source)?;
|
|
||||||
|
|
||||||
// if this is a variable binding, and we've never defined this variable before,
|
|
||||||
// we should tell cranelift about it. this is optimistic; if we fail to compile,
|
|
||||||
// then we won't use this definition until someone tries again.
|
|
||||||
if let Statement::Binding(_, ref name, _) = syntax {
|
|
||||||
if !self.variable_binding_sites.contains_key(name.as_str()) {
|
|
||||||
self.jitter.define_string(name)?;
|
|
||||||
self.jitter.define_variable(name.clone())?;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let (mut errors, mut warnings) = syntax.validate(&mut self.variable_binding_sites);
|
|
||||||
let stop = !errors.is_empty();
|
|
||||||
let messages = errors
|
|
||||||
.drain(..)
|
|
||||||
.map(Into::into)
|
|
||||||
.chain(warnings.drain(..).map(Into::into));
|
|
||||||
|
|
||||||
for message in messages {
|
|
||||||
self.emit_diagnostic(message)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
if stop {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let ir = IR::from(syntax.simplify(&mut self.gensym_index));
|
|
||||||
let name = format!("line{}", line_no);
|
|
||||||
let function_id = self.jitter.compile_function(&name, ir)?;
|
|
||||||
self.jitter.module.finalize_definitions()?;
|
|
||||||
let compiled_bytes = self.jitter.bytes(function_id);
|
|
||||||
let compiled_function = unsafe { std::mem::transmute::<_, fn() -> ()>(compiled_bytes) };
|
|
||||||
compiled_function();
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> Result<(), BackendError> {
|
fn main() -> Result<(), BackendError> {
|
||||||
let mut editor = DefaultEditor::new().expect("rustyline works");
|
let mut editor = DefaultEditor::new().expect("rustyline works");
|
||||||
let mut line_no = 0;
|
let mut line_no = 0;
|
||||||
let mut writer = StandardStream::stdout(ColorChoice::Auto);
|
let mut state = ngr::REPL::default();
|
||||||
let config = codespan_reporting::term::Config::default();
|
|
||||||
let mut state = RunLoop::new(&mut writer, config)?;
|
|
||||||
|
|
||||||
println!("No Good Reason, the Interpreter!");
|
println!("No Good Reason, the Interpreter!");
|
||||||
loop {
|
loop {
|
||||||
@@ -135,18 +16,30 @@ fn main() -> Result<(), BackendError> {
|
|||||||
":quit" => break,
|
":quit" => break,
|
||||||
_ => state.process_input(line_no, command),
|
_ => state.process_input(line_no, command),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// it's not clear to me what this could be, but OK
|
||||||
Err(ReadlineError::Io(e)) => {
|
Err(ReadlineError::Io(e)) => {
|
||||||
eprintln!("IO error: {}", e);
|
eprintln!("IO error: {}", e);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Control-D and Control-C
|
||||||
Err(ReadlineError::Eof) => break,
|
Err(ReadlineError::Eof) => break,
|
||||||
Err(ReadlineError::Interrupted) => break,
|
Err(ReadlineError::Interrupted) => break,
|
||||||
|
|
||||||
|
// For some reason this doesn't exist on Windows. I also don't quite know
|
||||||
|
// what would cause this, but ...
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
Err(ReadlineError::Errno(e)) => {
|
Err(ReadlineError::Errno(e)) => {
|
||||||
eprintln!("Unknown syscall error: {}", e);
|
eprintln!("Unknown syscall error: {}", e);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We don't actually do any reflow-ing if we change the terminal size,
|
||||||
|
// so we can just ignore this.
|
||||||
Err(ReadlineError::WindowResized) => continue,
|
Err(ReadlineError::WindowResized) => continue,
|
||||||
|
|
||||||
|
// Why on earth are there so many error types?
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Unknown internal error: {}", e);
|
eprintln!("Unknown internal error: {}", e);
|
||||||
break;
|
break;
|
||||||
|
|||||||
157
src/compiler.rs
Normal file
157
src/compiler.rs
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
use crate::backend::Backend;
|
||||||
|
use crate::ir::Program as IR;
|
||||||
|
use crate::syntax::Program as Syntax;
|
||||||
|
use codespan_reporting::{
|
||||||
|
diagnostic::Diagnostic,
|
||||||
|
files::SimpleFiles,
|
||||||
|
term::{self, Config},
|
||||||
|
};
|
||||||
|
use pretty::termcolor::{ColorChoice, StandardStream};
|
||||||
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
|
/// A high-level compiler for NGR programs.
|
||||||
|
///
|
||||||
|
/// This object can be built once, and then re-used many times to build multiple
|
||||||
|
/// files. For most users, the [`Default`] implementation should be sufficient;
|
||||||
|
/// it will use `stderr` for warnings and errors, with default colors based on
|
||||||
|
/// what we discover from the terminal. For those who want to provide alternate
|
||||||
|
/// outputs, though, the `Compiler::new` constructor is available.
|
||||||
|
pub struct Compiler {
|
||||||
|
file_database: SimpleFiles<String, String>,
|
||||||
|
console: StandardStream,
|
||||||
|
console_config: Config,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Compiler {
|
||||||
|
fn default() -> Self {
|
||||||
|
let console = StandardStream::stderr(ColorChoice::Auto);
|
||||||
|
Compiler::new(console, Config::default())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Compiler {
|
||||||
|
/// Create a new compiler object.
|
||||||
|
///
|
||||||
|
/// This object can be re-used to compile as many files as you like.
|
||||||
|
/// Use this function if you want to configure your output console and/or
|
||||||
|
/// its configuration in some custom way. Alternatively, you can use the
|
||||||
|
/// `Default` implementation, which will emit information to `stderr` with
|
||||||
|
/// a reasonable default configuration.
|
||||||
|
pub fn new(console: StandardStream, console_config: Config) -> Self {
|
||||||
|
Compiler {
|
||||||
|
file_database: SimpleFiles::new(),
|
||||||
|
console,
|
||||||
|
console_config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compile the given file, returning the object file as a vector of bytes.
|
||||||
|
///
|
||||||
|
/// This function may create output, via the console configured with this
|
||||||
|
/// `Compiler` object. If the compilation fails for any reason, will return
|
||||||
|
/// `None`.
|
||||||
|
pub fn compile<P: AsRef<str>>(&mut self, input_file: P) -> Option<Vec<u8>> {
|
||||||
|
match self.compile_internal(input_file.as_ref()) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => {
|
||||||
|
self.emit(e.into());
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is the actual meat of the compilation chain; we hide it from the user
|
||||||
|
/// because the type is kind of unpleasant.
|
||||||
|
///
|
||||||
|
/// The weird error type comes from the fact that we can run into three types
|
||||||
|
/// of result:
|
||||||
|
///
|
||||||
|
/// * Fundamental errors, like an incorrectly formatted file or some
|
||||||
|
/// oddity with IO. These return `Err`.
|
||||||
|
/// * Validation errors, where we reject the program due to something
|
||||||
|
/// semantically wrong with them. These return `Ok(None)`.
|
||||||
|
/// * Success! In this case, we return `Ok(Some(...))`, where the bytes
|
||||||
|
/// returned is the contents of the compiled object file.
|
||||||
|
///
|
||||||
|
fn compile_internal(&mut self, input_file: &str) -> Result<Option<Vec<u8>>, CompilerError> {
|
||||||
|
// Try to parse the file into our syntax AST. If we fail, emit the error
|
||||||
|
// and then immediately return `None`.
|
||||||
|
let syntax = Syntax::parse_file(&mut self.file_database, input_file)?;
|
||||||
|
|
||||||
|
// Now validate the user's syntax AST. This can possibly find errors and/or
|
||||||
|
// create warnings. We can continue if we only get warnings, but need to stop
|
||||||
|
// if we get any errors.
|
||||||
|
let (mut errors, mut warnings) = syntax.validate();
|
||||||
|
let stop = !errors.is_empty();
|
||||||
|
let messages = errors
|
||||||
|
.drain(..)
|
||||||
|
.map(Into::into)
|
||||||
|
.chain(warnings.drain(..).map(Into::into));
|
||||||
|
|
||||||
|
// emit all the messages we receive; warnings *and* errors
|
||||||
|
for message in messages {
|
||||||
|
self.emit(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
// we got errors, so just stop right now. perhaps oddly, this is Ok(None);
|
||||||
|
// we've already said all we're going to say in the messags above, so there's
|
||||||
|
// no need to provide another `Err` result.
|
||||||
|
if stop {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we've validated it, turn it into IR.
|
||||||
|
let ir = IR::from(syntax);
|
||||||
|
|
||||||
|
// Finally, send all this to Cranelift for conversion into an object file.
|
||||||
|
let mut backend = Backend::object_file(Triple::host())?;
|
||||||
|
backend.compile_function("gogogo", ir)?;
|
||||||
|
Ok(Some(backend.bytes()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit a diagnostic.
|
||||||
|
///
|
||||||
|
/// This is just a really handy shorthand we use elsewhere in the object, because
|
||||||
|
/// there's a lot of boilerplate we'd like to skip.
|
||||||
|
fn emit(&mut self, diagnostic: Diagnostic<usize>) {
|
||||||
|
term::emit(
|
||||||
|
&mut self.console.lock(),
|
||||||
|
&self.console_config,
|
||||||
|
&self.file_database,
|
||||||
|
&diagnostic,
|
||||||
|
)
|
||||||
|
.expect("codespan reporting term::emit works");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is just a handy type that we can convert things into; it's not
|
||||||
|
// exposed outside this module, and doesn't actually do much of interest.
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
enum CompilerError {
|
||||||
|
#[error(transparent)]
|
||||||
|
Backend(#[from] crate::backend::BackendError),
|
||||||
|
#[error(transparent)]
|
||||||
|
ParserError(#[from] crate::syntax::ParserError),
|
||||||
|
#[error(transparent)]
|
||||||
|
IoError(#[from] std::io::Error),
|
||||||
|
#[error(transparent)]
|
||||||
|
WriteError(#[from] cranelift_object::object::write::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since we're going to use codespan to report pretty much all errors,
|
||||||
|
// this just passes through most of the errors, or makes simple versions
|
||||||
|
// of `Diagnostic` for those that we don't have existing `From`s.
|
||||||
|
impl From<CompilerError> for Diagnostic<usize> {
|
||||||
|
fn from(value: CompilerError) -> Self {
|
||||||
|
match value {
|
||||||
|
CompilerError::Backend(be) => be.into(),
|
||||||
|
CompilerError::ParserError(pe) => (&pe).into(),
|
||||||
|
CompilerError::IoError(e) => {
|
||||||
|
Diagnostic::error().with_message(format!("IO error: {}", e))
|
||||||
|
}
|
||||||
|
CompilerError::WriteError(e) => {
|
||||||
|
Diagnostic::error().with_message(format!("Module write error: {}", e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
73
src/eval.rs
73
src/eval.rs
@@ -1,3 +1,38 @@
|
|||||||
|
//! Helpful functions for evaluating NGR programs.
|
||||||
|
//!
|
||||||
|
//! Look, this is a compiler, and so you might be asking why it has a bunch of
|
||||||
|
//! stuff in it to help with writing interpreters. Well, the answer is simple:
|
||||||
|
//! testing. It's really nice to know that if you start with a program that
|
||||||
|
//! does a thing, and then you muck with it, you end up with a program that does
|
||||||
|
//! the exact same thing. If you talk to people who think about language
|
||||||
|
//! semantics, they'll call this "observational equivalence": maybe the two
|
||||||
|
//! programs don't do 100% the same things in the same order, but you shouldn't
|
||||||
|
//! be able to observe the difference ... at least, not without a stopwatch,
|
||||||
|
//! memory profilers, etc.
|
||||||
|
//!
|
||||||
|
//! The actual evaluators for our various syntaxes are hidden in `eval` functions
|
||||||
|
//! of the various ASTs. It's nice to have them "next to" the syntax that way, so
|
||||||
|
//! that we just edit stuff in one part of the source tree at a time. This module,
|
||||||
|
//! then, just contains some things that are generally helpful across all the
|
||||||
|
//! interpreters we've written.
|
||||||
|
//!
|
||||||
|
//! In particular, this module helps with:
|
||||||
|
//!
|
||||||
|
//! * Defining a common error type -- [`EvalError`] -- that we can reasonably
|
||||||
|
//! compare. It's nice to compare errors, here, because we want to know that
|
||||||
|
//! if a program used to fail, it will still fail after we change it, and
|
||||||
|
//! fail in the exact same way.
|
||||||
|
//! * Defining a notion of a binding environment: [`EvalEnvironment`]. This
|
||||||
|
//! will help us keep track of variables bound in our program, as we run it.
|
||||||
|
//! * Defining a notion of a runtime value: [`Value`]. Yes, this is the
|
||||||
|
//! umpteenth time that we're re-defining basically the same enumeration
|
||||||
|
//! with exactly the same name, but it's nice to have it separated so that
|
||||||
|
//! we don't confuse them.
|
||||||
|
//! * Finally, this module implements all of our primitive functions, as the
|
||||||
|
//! [`Value::calculate`] function. This is just a nice abstraction boundary,
|
||||||
|
//! because the implementation of some parts of these primitives is really
|
||||||
|
//! awful to look at.
|
||||||
|
//!
|
||||||
mod env;
|
mod env;
|
||||||
mod primop;
|
mod primop;
|
||||||
mod value;
|
mod value;
|
||||||
@@ -9,6 +44,13 @@ pub use value::Value;
|
|||||||
|
|
||||||
use crate::backend::BackendError;
|
use crate::backend::BackendError;
|
||||||
|
|
||||||
|
/// All of the errors that can happen trying to evaluate an NGR program.
|
||||||
|
///
|
||||||
|
/// This is yet another standard [`thiserror::Error`] type, but with the
|
||||||
|
/// caveat that it implements [`PartialEq`] even though some of its
|
||||||
|
/// constituent members don't. It does so through the very sketchy mechanism
|
||||||
|
/// of converting those errors to strings and then seeing if they're the
|
||||||
|
/// same.
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
pub enum EvalError {
|
pub enum EvalError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
@@ -18,15 +60,15 @@ pub enum EvalError {
|
|||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Backend(#[from] BackendError),
|
Backend(#[from] BackendError),
|
||||||
#[error("IO error: {0}")]
|
#[error("IO error: {0}")]
|
||||||
IO(String),
|
IO(#[from] std::io::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Module(#[from] ModuleError),
|
Module(#[from] ModuleError),
|
||||||
}
|
#[error("Linker error: {0}")]
|
||||||
|
Linker(String),
|
||||||
impl From<std::io::Error> for EvalError {
|
#[error("Program exitted with status {0}")]
|
||||||
fn from(value: std::io::Error) -> Self {
|
ExitCode(std::process::ExitStatus),
|
||||||
EvalError::IO(value.to_string())
|
#[error("Unexpected output at runtime: {0}")]
|
||||||
}
|
RuntimeOutput(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for EvalError {
|
impl PartialEq for EvalError {
|
||||||
@@ -48,7 +90,7 @@ impl PartialEq for EvalError {
|
|||||||
},
|
},
|
||||||
|
|
||||||
EvalError::IO(a) => match other {
|
EvalError::IO(a) => match other {
|
||||||
EvalError::IO(b) => a == b,
|
EvalError::IO(b) => a.to_string() == b.to_string(),
|
||||||
_ => false,
|
_ => false,
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -56,6 +98,21 @@ impl PartialEq for EvalError {
|
|||||||
EvalError::Module(b) => a.to_string() == b.to_string(),
|
EvalError::Module(b) => a.to_string() == b.to_string(),
|
||||||
_ => false,
|
_ => false,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
EvalError::Linker(a) => match other {
|
||||||
|
EvalError::Linker(b) => a == b,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
|
||||||
|
EvalError::ExitCode(a) => match other {
|
||||||
|
EvalError::ExitCode(b) => a == b,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
|
||||||
|
EvalError::RuntimeOutput(a) => match other {
|
||||||
|
EvalError::RuntimeOutput(b) => a == b,
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,15 +2,28 @@ use crate::eval::Value;
|
|||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
/// An evaluation environment, which maps variable names to their
|
||||||
|
/// current values.
|
||||||
|
///
|
||||||
|
/// One key difference between `EvalEnvironment` and `HashMap` is that
|
||||||
|
/// `EvalEnvironment` uses an `extend` mechanism to add keys, rather
|
||||||
|
/// than an `insert`. This difference allows you to add mappings for
|
||||||
|
/// a subcomputation while still retaining the old version without those
|
||||||
|
/// keys, which is really handy for implementing variable scoping.
|
||||||
pub struct EvalEnvironment {
|
pub struct EvalEnvironment {
|
||||||
inner: Arc<EvalEnvInternal>,
|
inner: Arc<EvalEnvInternal>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum EvalEnvInternal {
|
enum EvalEnvInternal {
|
||||||
Empty,
|
Empty,
|
||||||
Value(ArcIntern<String>, Value, Arc<EvalEnvInternal>),
|
Value(ArcIntern<String>, Value, Arc<EvalEnvInternal>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Errors that can happen when looking up a variable.
|
||||||
|
///
|
||||||
|
/// This enumeration may be extended in the future, depending on if we
|
||||||
|
/// get more subtle with our keys. But for now, this is just a handy
|
||||||
|
/// way to make lookup failures be `thiserror::Error`s.
|
||||||
#[derive(Clone, Debug, PartialEq, thiserror::Error)]
|
#[derive(Clone, Debug, PartialEq, thiserror::Error)]
|
||||||
pub enum LookupError {
|
pub enum LookupError {
|
||||||
#[error("Could not find variable '{0}' in environment")]
|
#[error("Could not find variable '{0}' in environment")]
|
||||||
@@ -24,28 +37,38 @@ impl Default for EvalEnvironment {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl EvalEnvironment {
|
impl EvalEnvironment {
|
||||||
|
/// Create a new, empty environment.
|
||||||
pub fn empty() -> Self {
|
pub fn empty() -> Self {
|
||||||
EvalEnvironment {
|
EvalEnvironment {
|
||||||
inner: Arc::new(EvalEnvInternal::Empty),
|
inner: Arc::new(EvalEnvInternal::Empty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Extend the environment with a new mapping.
|
||||||
|
///
|
||||||
|
/// Note the types: the result of this method is a new `EvalEnvironment`,
|
||||||
|
/// with its own lifetime, and the original environment is left unmodified.
|
||||||
pub fn extend(&self, name: ArcIntern<String>, value: Value) -> Self {
|
pub fn extend(&self, name: ArcIntern<String>, value: Value) -> Self {
|
||||||
EvalEnvironment {
|
EvalEnvironment {
|
||||||
inner: Arc::new(EvalEnvInternal::Value(name, value, self.inner.clone())),
|
inner: Arc::new(EvalEnvInternal::Value(name, value, self.inner.clone())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Look up a variable in the environment, returning an error if it isn't there.
|
||||||
pub fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
|
pub fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
|
||||||
self.inner.lookup(n)
|
self.inner.lookup(n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EvalEnvInternal {
|
impl EvalEnvInternal {
|
||||||
|
/// Look up a variable in the environment, returning an error if it isn't there.
|
||||||
fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
|
fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
|
||||||
match self {
|
match self {
|
||||||
|
// if this is an empty dictionary, never mind, couldn't find it
|
||||||
EvalEnvInternal::Empty => Err(LookupError::CouldNotFind(n)),
|
EvalEnvInternal::Empty => Err(LookupError::CouldNotFind(n)),
|
||||||
|
// is this the key we have right here? if yes, return our value
|
||||||
EvalEnvInternal::Value(name, value, _) if *name == n => Ok(value.clone()),
|
EvalEnvInternal::Value(name, value, _) if *name == n => Ok(value.clone()),
|
||||||
|
// otherwise, recurse up our chain of environments
|
||||||
EvalEnvInternal::Value(_, _, rest) => rest.lookup(n),
|
EvalEnvInternal::Value(_, _, rest) => rest.lookup(n),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -70,6 +93,9 @@ mod tests {
|
|||||||
assert!(tester.lookup(arced("baz")).is_err());
|
assert!(tester.lookup(arced("baz")).is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// added this test to make sure that our nesting property works propertly.
|
||||||
|
// it's not a big deal now, but it'll be really handy later when we add any
|
||||||
|
// kind of variable scoping.
|
||||||
#[test]
|
#[test]
|
||||||
fn nested() {
|
fn nested() {
|
||||||
let tester = EvalEnvironment::default();
|
let tester = EvalEnvironment::default();
|
||||||
|
|||||||
@@ -1,19 +1,39 @@
|
|||||||
use crate::eval::value::Value;
|
use crate::eval::value::Value;
|
||||||
|
|
||||||
|
/// Errors that can occur running primitive operations in the evaluators.
|
||||||
#[derive(Clone, Debug, PartialEq, thiserror::Error)]
|
#[derive(Clone, Debug, PartialEq, thiserror::Error)]
|
||||||
pub enum PrimOpError {
|
pub enum PrimOpError {
|
||||||
#[error("Math error (underflow or overflow) computing {0} operator")]
|
#[error("Math error (underflow or overflow) computing {0} operator")]
|
||||||
MathFailure(&'static str),
|
MathFailure(&'static str),
|
||||||
|
/// This particular variant covers the case in which a primitive
|
||||||
|
/// operator takes two arguments that are supposed to be the same,
|
||||||
|
/// but they differ. (So, like, all the math operators.)
|
||||||
#[error("Type mismatch ({1} vs {2}) computing {0} operator")]
|
#[error("Type mismatch ({1} vs {2}) computing {0} operator")]
|
||||||
TypeMismatch(String, Value, Value),
|
TypeMismatch(String, Value, Value),
|
||||||
|
/// This variant covers when an operator must take a particular
|
||||||
|
/// type, but the user has provided a different one.
|
||||||
#[error("Bad type for operator {0}: {1}")]
|
#[error("Bad type for operator {0}: {1}")]
|
||||||
BadTypeFor(&'static str, Value),
|
BadTypeFor(&'static str, Value),
|
||||||
|
/// Probably obvious from the name, but just to be very clear: this
|
||||||
|
/// happens when you pass three arguments to a two argument operator,
|
||||||
|
/// etc. Technically that's a type error of some sort, but we split
|
||||||
|
/// it out.
|
||||||
#[error("Illegal number of arguments for {0}: {1} arguments found")]
|
#[error("Illegal number of arguments for {0}: {1} arguments found")]
|
||||||
BadArgCount(String, usize),
|
BadArgCount(String, usize),
|
||||||
#[error("Unknown primitive operation {0}")]
|
#[error("Unknown primitive operation {0}")]
|
||||||
UnknownPrimOp(String),
|
UnknownPrimOp(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Implementing primitives in an interpreter like this is *super* tedious,
|
||||||
|
// and the only way to make it even somewhat manageable is to use macros.
|
||||||
|
// This particular macro works for binary operations, and assumes that
|
||||||
|
// you've already worked out that the `calculate` call provided two arguments.
|
||||||
|
//
|
||||||
|
// In those cases, it will rul the operations we know about, and error if
|
||||||
|
// it doesn't.
|
||||||
|
//
|
||||||
|
// This macro then needs to be instantiated for every type, which is super
|
||||||
|
// fun.
|
||||||
macro_rules! run_op {
|
macro_rules! run_op {
|
||||||
($op: ident, $left: expr, $right: expr) => {
|
($op: ident, $left: expr, $right: expr) => {
|
||||||
match $op {
|
match $op {
|
||||||
@@ -23,15 +43,15 @@ macro_rules! run_op {
|
|||||||
.map(Into::into),
|
.map(Into::into),
|
||||||
"-" => $left
|
"-" => $left
|
||||||
.checked_sub($right)
|
.checked_sub($right)
|
||||||
.ok_or(PrimOpError::MathFailure("+"))
|
.ok_or(PrimOpError::MathFailure("-"))
|
||||||
.map(Into::into),
|
.map(Into::into),
|
||||||
"*" => $left
|
"*" => $left
|
||||||
.checked_mul($right)
|
.checked_mul($right)
|
||||||
.ok_or(PrimOpError::MathFailure("+"))
|
.ok_or(PrimOpError::MathFailure("*"))
|
||||||
.map(Into::into),
|
.map(Into::into),
|
||||||
"/" => $left
|
"/" => $left
|
||||||
.checked_div($right)
|
.checked_div($right)
|
||||||
.ok_or(PrimOpError::MathFailure("+"))
|
.ok_or(PrimOpError::MathFailure("/"))
|
||||||
.map(Into::into),
|
.map(Into::into),
|
||||||
_ => Err(PrimOpError::UnknownPrimOp($op.to_string())),
|
_ => Err(PrimOpError::UnknownPrimOp($op.to_string())),
|
||||||
}
|
}
|
||||||
@@ -41,6 +61,8 @@ macro_rules! run_op {
|
|||||||
impl Value {
|
impl Value {
|
||||||
fn binary_op(operation: &str, left: &Value, right: &Value) -> Result<Value, PrimOpError> {
|
fn binary_op(operation: &str, left: &Value, right: &Value) -> Result<Value, PrimOpError> {
|
||||||
match left {
|
match left {
|
||||||
|
// for now we only have one type, but in the future this is
|
||||||
|
// going to be very irritating.
|
||||||
Value::I64(x) => match right {
|
Value::I64(x) => match right {
|
||||||
Value::I64(y) => run_op!(operation, x, *y),
|
Value::I64(y) => run_op!(operation, x, *y),
|
||||||
// _ => Err(PrimOpError::TypeMismatch(
|
// _ => Err(PrimOpError::TypeMismatch(
|
||||||
@@ -52,6 +74,14 @@ impl Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculate the result of running the given primitive on the given arguments.
|
||||||
|
///
|
||||||
|
/// This can cause errors in a whole mess of ways, so be careful about your
|
||||||
|
/// inputs. For example, addition only works when the two values have the exact
|
||||||
|
/// same type, so expect an error if you try to do so. In addition, this
|
||||||
|
/// implementation catches and raises an error on overflow or underflow, so
|
||||||
|
/// its worth being careful to make sure that your inputs won't cause either
|
||||||
|
/// condition.
|
||||||
pub fn calculate(operation: &str, values: Vec<Value>) -> Result<Value, PrimOpError> {
|
pub fn calculate(operation: &str, values: Vec<Value>) -> Result<Value, PrimOpError> {
|
||||||
if values.len() == 2 {
|
if values.len() == 2 {
|
||||||
Value::binary_op(operation, &values[0], &values[1])
|
Value::binary_op(operation, &values[0], &values[1])
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
/// Values in the interpreter.
|
||||||
|
///
|
||||||
|
/// Yes, this is yet another definition of a structure called `Value`, which
|
||||||
|
/// are almost entirely identical. However, it's nice to have them separated
|
||||||
|
/// by type so that we don't mix them up.
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
I64(i64),
|
I64(i64),
|
||||||
|
|||||||
14
src/ir.rs
14
src/ir.rs
@@ -1,3 +1,17 @@
|
|||||||
|
//! The middle of the compiler: analysis, simplification, optimization.
|
||||||
|
//!
|
||||||
|
//! For the moment, this module doesn't do much besides define an intermediate
|
||||||
|
//! representation for NGR programs that is a little easier to work with then
|
||||||
|
//! the structures we've built from the actual user syntax. For example, in the
|
||||||
|
//! IR syntax, function calls are simplified so that all their arguments are
|
||||||
|
//! either variables or constants, which can make reasoning about programs
|
||||||
|
//! (and implicit temporary variables) quite a bit easier.
|
||||||
|
//!
|
||||||
|
//! For the foreseeable future, this module will likely remain mostly empty
|
||||||
|
//! besides definitions, as we'll likely want to focus on just processing /
|
||||||
|
//! validating syntax, and then figuring out how to turn it into Cranelift
|
||||||
|
//! and object code. After that point, however, this will be the module to
|
||||||
|
//! come to for analysis and optimization work.
|
||||||
mod ast;
|
mod ast;
|
||||||
mod eval;
|
mod eval;
|
||||||
mod from_syntax;
|
mod from_syntax;
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
use crate::syntax::Location;
|
||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
use pretty::{DocAllocator, Pretty};
|
use pretty::{DocAllocator, Pretty};
|
||||||
use proptest::{
|
use proptest::{
|
||||||
@@ -5,13 +6,28 @@ use proptest::{
|
|||||||
strategy::{BoxedStrategy, Strategy},
|
strategy::{BoxedStrategy, Strategy},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::syntax::Location;
|
/// We're going to represent variables as interned strings.
|
||||||
|
///
|
||||||
|
/// These should be fast enough for comparison that it's OK, since it's going to end up
|
||||||
|
/// being pretty much the pointer to the string.
|
||||||
type Variable = ArcIntern<String>;
|
type Variable = ArcIntern<String>;
|
||||||
|
|
||||||
|
/// The representation of a program within our IR. For now, this is exactly one file.
|
||||||
|
///
|
||||||
|
/// In addition, for the moment there's not really much of interest to hold here besides
|
||||||
|
/// the list of statements read from the file. Order is important. In the future, you
|
||||||
|
/// could imagine caching analysis information in this structure.
|
||||||
|
///
|
||||||
|
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
|
||||||
|
/// to print the structure whenever possible, especially if you value your or your
|
||||||
|
/// user's time. The latter is useful for testing that conversions of `Program` retain
|
||||||
|
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
|
||||||
|
/// syntactically valid, although they may contain runtime issue like over- or underflow.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Program {
|
pub struct Program {
|
||||||
pub statements: Vec<Statement>,
|
// For now, a program is just a vector of statements. In the future, we'll probably
|
||||||
|
// extend this to include a bunch of other information, but for now: just a list.
|
||||||
|
pub(crate) statements: Vec<Statement>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
|
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
|
||||||
@@ -23,6 +39,8 @@ where
|
|||||||
let mut result = allocator.nil();
|
let mut result = allocator.nil();
|
||||||
|
|
||||||
for stmt in self.statements.iter() {
|
for stmt in self.statements.iter() {
|
||||||
|
// there's probably a better way to do this, rather than constantly
|
||||||
|
// adding to the end, but this works.
|
||||||
result = result
|
result = result
|
||||||
.append(stmt.pretty(allocator))
|
.append(stmt.pretty(allocator))
|
||||||
.append(allocator.text(";"))
|
.append(allocator.text(";"))
|
||||||
@@ -39,11 +57,21 @@ impl Arbitrary for Program {
|
|||||||
|
|
||||||
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
||||||
crate::syntax::Program::arbitrary_with(args)
|
crate::syntax::Program::arbitrary_with(args)
|
||||||
.prop_map(|x| Program::from(x.simplify()))
|
.prop_map(Program::from)
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The representation of a statement in the language.
|
||||||
|
///
|
||||||
|
/// For now, this is either a binding site (`x = 4`) or a print statement
|
||||||
|
/// (`print x`). Someday, though, more!
|
||||||
|
///
|
||||||
|
/// As with `Program`, this type implements [`Pretty`], which should
|
||||||
|
/// be used to display the structure whenever possible. It does not
|
||||||
|
/// implement [`Arbitrary`], though, mostly because it's slightly
|
||||||
|
/// complicated to do so.
|
||||||
|
///
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Statement {
|
pub enum Statement {
|
||||||
Binding(Location, Variable, Expression),
|
Binding(Location, Variable, Expression),
|
||||||
@@ -71,6 +99,18 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The representation of an expression.
|
||||||
|
///
|
||||||
|
/// Note that expressions, like everything else in this syntax tree,
|
||||||
|
/// supports [`Pretty`], and it's strongly encouraged that you use
|
||||||
|
/// that trait/module when printing these structures.
|
||||||
|
///
|
||||||
|
/// Also, Expressions at this point in the compiler are explicitly
|
||||||
|
/// defined so that they are *not* recursive. By this point, if an
|
||||||
|
/// expression requires some other data (like, for example, invoking
|
||||||
|
/// a primitive), any subexpressions have been bound to variables so
|
||||||
|
/// that the referenced data will always either be a constant or a
|
||||||
|
/// variable reference.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
Value(Location, Value),
|
Value(Location, Value),
|
||||||
@@ -107,6 +147,12 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A type representing the primitives allowed in the language.
|
||||||
|
///
|
||||||
|
/// Having this as an enumeration avoids a lot of "this should not happen"
|
||||||
|
/// cases, but might prove to be cumbersome in the future. If that happens,
|
||||||
|
/// this may either become a more hierarchical enumeration, or we'll just
|
||||||
|
/// deal with the "this should not happen" cases.
|
||||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
pub enum Primitive {
|
pub enum Primitive {
|
||||||
Plus,
|
Plus,
|
||||||
@@ -144,6 +190,11 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An expression that is always either a value or a reference.
|
||||||
|
///
|
||||||
|
/// This is the type used to guarantee that we don't nest expressions
|
||||||
|
/// at this level. Instead, expressions that take arguments take one
|
||||||
|
/// of these, which can only be a constant or a reference.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum ValueOrRef {
|
pub enum ValueOrRef {
|
||||||
Value(Location, Value),
|
Value(Location, Value),
|
||||||
@@ -163,8 +214,23 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<ValueOrRef> for Expression {
|
||||||
|
fn from(value: ValueOrRef) -> Self {
|
||||||
|
match value {
|
||||||
|
ValueOrRef::Value(loc, val) => Expression::Value(loc, val),
|
||||||
|
ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A constant in the IR.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
|
/// A numerical constant.
|
||||||
|
///
|
||||||
|
/// The optional argument is the base that was used by the user to input
|
||||||
|
/// the number. By retaining it, we can ensure that if we need to print the
|
||||||
|
/// number back out, we can do so in the form that the user entered it.
|
||||||
Number(Option<u8>, i64),
|
Number(Option<u8>, i64),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,10 @@ use crate::ir::{Expression, Program, Statement};
|
|||||||
use super::{Primitive, ValueOrRef};
|
use super::{Primitive, ValueOrRef};
|
||||||
|
|
||||||
impl Program {
|
impl Program {
|
||||||
|
/// Evaluate the program, returning either an error or a string containing everything
|
||||||
|
/// the program printed out.
|
||||||
|
///
|
||||||
|
/// The print outs will be newline separated, with one print out per line.
|
||||||
pub fn eval(&self) -> Result<String, EvalError> {
|
pub fn eval(&self) -> Result<String, EvalError> {
|
||||||
let mut env = EvalEnvironment::empty();
|
let mut env = EvalEnvironment::empty();
|
||||||
let mut stdout = String::new();
|
let mut stdout = String::new();
|
||||||
@@ -39,6 +43,9 @@ impl Expression {
|
|||||||
Expression::Primitive(_, op, args) => {
|
Expression::Primitive(_, op, args) => {
|
||||||
let mut arg_values = Vec::with_capacity(args.len());
|
let mut arg_values = Vec::with_capacity(args.len());
|
||||||
|
|
||||||
|
// we implement primitive operations by first evaluating each of the
|
||||||
|
// arguments to the function, and then gathering up all the values
|
||||||
|
// produced.
|
||||||
for arg in args.iter() {
|
for arg in args.iter() {
|
||||||
match arg {
|
match arg {
|
||||||
ValueOrRef::Ref(_, n) => arg_values.push(env.lookup(n.clone())?),
|
ValueOrRef::Ref(_, n) => arg_values.push(env.lookup(n.clone())?),
|
||||||
@@ -48,6 +55,8 @@ impl Expression {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// and then finally we call `calculate` to run them. trust me, it's nice
|
||||||
|
// to not have to deal with all the nonsense hidden under `calculate`.
|
||||||
match op {
|
match op {
|
||||||
Primitive::Plus => Ok(Value::calculate("+", arg_values)?),
|
Primitive::Plus => Ok(Value::calculate("+", arg_values)?),
|
||||||
Primitive::Minus => Ok(Value::calculate("-", arg_values)?),
|
Primitive::Minus => Ok(Value::calculate("-", arg_values)?),
|
||||||
@@ -62,7 +71,7 @@ impl Expression {
|
|||||||
#[test]
|
#[test]
|
||||||
fn two_plus_three() {
|
fn two_plus_three() {
|
||||||
let input = crate::syntax::Program::parse(0, "x = 2 + 3; print x;").expect("parse works");
|
let input = crate::syntax::Program::parse(0, "x = 2 + 3; print x;").expect("parse works");
|
||||||
let ir = Program::from(input.simplify());
|
let ir = Program::from(input);
|
||||||
let output = ir.eval().expect("runs successfully");
|
let output = ir.eval().expect("runs successfully");
|
||||||
assert_eq!("x = 5i64\n", &output);
|
assert_eq!("x = 5i64\n", &output);
|
||||||
}
|
}
|
||||||
@@ -71,7 +80,7 @@ fn two_plus_three() {
|
|||||||
fn lotsa_math() {
|
fn lotsa_math() {
|
||||||
let input =
|
let input =
|
||||||
crate::syntax::Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works");
|
crate::syntax::Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works");
|
||||||
let ir = Program::from(input.simplify());
|
let ir = Program::from(input);
|
||||||
let output = ir.eval().expect("runs successfully");
|
let output = ir.eval().expect("runs successfully");
|
||||||
assert_eq!("x = 7i64\n", &output);
|
assert_eq!("x = 7i64\n", &output);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,82 +1,185 @@
|
|||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
|
use std::sync::atomic::AtomicUsize;
|
||||||
|
|
||||||
use crate::ir::ast as ir;
|
use crate::ir::ast as ir;
|
||||||
use crate::syntax::ast as syntax;
|
use crate::syntax;
|
||||||
|
|
||||||
|
use super::ValueOrRef;
|
||||||
|
|
||||||
impl From<syntax::Program> for ir::Program {
|
impl From<syntax::Program> for ir::Program {
|
||||||
|
/// We implement the top-level conversion of a syntax::Program into an
|
||||||
|
/// ir::Program using just the standard `From::from`, because we don't
|
||||||
|
/// need to return any arguments and we shouldn't produce any errors.
|
||||||
|
/// Technically there's an `unwrap` deep under the hood that we could
|
||||||
|
/// float out, but the validator really should've made sure that never
|
||||||
|
/// happens, so we're just going to assume.
|
||||||
fn from(mut value: syntax::Program) -> Self {
|
fn from(mut value: syntax::Program) -> Self {
|
||||||
ir::Program {
|
let mut statements = Vec::new();
|
||||||
statements: value.statements.drain(..).map(Into::into).collect(),
|
|
||||||
|
for stmt in value.statements.drain(..) {
|
||||||
|
statements.append(&mut stmt.simplify());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ir::Program { statements }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Vec<syntax::Statement>> for ir::Program {
|
impl From<syntax::Statement> for ir::Program {
|
||||||
fn from(mut value: Vec<syntax::Statement>) -> Self {
|
/// One interesting thing about this conversion is that there isn't
|
||||||
ir::Program {
|
/// a natural translation from syntax::Statement to ir::Statement,
|
||||||
statements: value.drain(..).map(Into::into).collect(),
|
/// because the syntax version can have nested expressions and the
|
||||||
}
|
/// IR version can't.
|
||||||
}
|
///
|
||||||
}
|
/// As a result, we can naturally convert a syntax::Statement into
|
||||||
|
/// an ir::Program, because we can allow the additional binding
|
||||||
impl From<syntax::Statement> for ir::Statement {
|
/// sites to be generated, instead. And, bonus, it turns out that
|
||||||
|
/// this is what we wanted anyways.
|
||||||
fn from(value: syntax::Statement) -> Self {
|
fn from(value: syntax::Statement) -> Self {
|
||||||
match value {
|
ir::Program {
|
||||||
syntax::Statement::Binding(loc, name, expr) => {
|
statements: value.simplify(),
|
||||||
ir::Statement::Binding(loc, ArcIntern::from(name), ir::Expression::from(expr))
|
|
||||||
}
|
|
||||||
syntax::Statement::Print(loc, name) => ir::Statement::Print(loc, ArcIntern::from(name)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<syntax::Expression> for ir::Expression {
|
impl syntax::Statement {
|
||||||
fn from(value: syntax::Expression) -> Self {
|
/// Simplify a syntax::Statement into a series of ir::Statements.
|
||||||
match value {
|
///
|
||||||
syntax::Expression::Primitive(loc, name, mut exprs) => ir::Expression::Primitive(
|
/// The reason this function is one-to-many is because we may have to
|
||||||
|
/// introduce new binding sites in order to avoid having nested
|
||||||
|
/// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed
|
||||||
|
/// in syntax::Expression but are expressly *not* allowed in
|
||||||
|
/// ir::Expression. So this pass converts them into bindings, like
|
||||||
|
/// this:
|
||||||
|
///
|
||||||
|
/// x = (1 + 2) * 3;
|
||||||
|
///
|
||||||
|
/// ==>
|
||||||
|
///
|
||||||
|
/// x:1 = 1 + 2;
|
||||||
|
/// x:2 = x:1 * 3;
|
||||||
|
/// x = x:2
|
||||||
|
///
|
||||||
|
/// Thus ensuring that things are nice and simple. Note that the
|
||||||
|
/// binding of `x:2` is not, strictly speaking, necessary, but it
|
||||||
|
/// makes the code below much easier to read.
|
||||||
|
fn simplify(self) -> Vec<ir::Statement> {
|
||||||
|
let mut new_statements = vec![];
|
||||||
|
|
||||||
|
match self {
|
||||||
|
// Print statements we don't have to do much with
|
||||||
|
syntax::Statement::Print(loc, name) => {
|
||||||
|
new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bindings, however, may involve a single expression turning into
|
||||||
|
// a series of statements and then an expression.
|
||||||
|
syntax::Statement::Binding(loc, name, value) => {
|
||||||
|
let (mut prereqs, new_value) = value.rebind(&name);
|
||||||
|
new_statements.append(&mut prereqs);
|
||||||
|
new_statements.push(ir::Statement::Binding(
|
||||||
loc,
|
loc,
|
||||||
ir::Primitive::try_from(name.as_str()).unwrap(),
|
ArcIntern::new(name),
|
||||||
exprs.drain(..).map(Into::into).collect(),
|
new_value.into(),
|
||||||
),
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new_statements
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl syntax::Expression {
|
||||||
|
/// This actually does the meat of the simplification work, here, by rebinding
|
||||||
|
/// any nested expressions into their own variables. We have this return
|
||||||
|
/// `ValueOrRef` in all cases because it makes for slighly less code; in the
|
||||||
|
/// case when we actually want an `Expression`, we can just use `into()`.
|
||||||
|
fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
|
||||||
|
match self {
|
||||||
|
// Values just convert in the obvious way, and require no prereqs
|
||||||
|
syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
|
||||||
|
|
||||||
|
// Similarly, references just convert in the obvious way, and require
|
||||||
|
// no prereqs
|
||||||
syntax::Expression::Reference(loc, name) => {
|
syntax::Expression::Reference(loc, name) => {
|
||||||
ir::Expression::Reference(loc, ArcIntern::from(name))
|
(vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
|
||||||
}
|
|
||||||
syntax::Expression::Value(loc, value) => {
|
|
||||||
ir::Expression::Value(loc, ir::Value::from(value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<syntax::Expression> for ir::ValueOrRef {
|
// Primitive expressions are where we do the real work.
|
||||||
fn from(value: syntax::Expression) -> Self {
|
syntax::Expression::Primitive(loc, prim, mut expressions) => {
|
||||||
match value {
|
// generate a fresh new name for the binding site we're going to
|
||||||
syntax::Expression::Primitive(loc, _, _) => {
|
// introduce, basing the name on wherever we came from; so if this
|
||||||
panic!("{:?}: couldn't convert to valueorref", loc)
|
// expression was bound to `x` originally, it might become `x:23`.
|
||||||
|
//
|
||||||
|
// gensym is guaranteed to give us a name that is unused anywhere
|
||||||
|
// else in the program.
|
||||||
|
let new_name = gensym(base_name);
|
||||||
|
let mut prereqs = Vec::new();
|
||||||
|
let mut new_exprs = Vec::new();
|
||||||
|
|
||||||
|
// here we loop through every argument, and recurse on the expressions
|
||||||
|
// we find. that will give us any new binding sites that *they* introduce,
|
||||||
|
// and a simple value or reference that we can use in our result.
|
||||||
|
for expr in expressions.drain(..) {
|
||||||
|
let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str());
|
||||||
|
prereqs.append(&mut cur_prereqs);
|
||||||
|
new_exprs.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
syntax::Expression::Reference(loc, var) => {
|
// now we're going to use those new arguments to run the primitive, binding
|
||||||
ir::ValueOrRef::Ref(loc, ArcIntern::new(var))
|
// the results to the new variable we introduced.
|
||||||
}
|
let prim =
|
||||||
|
ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function");
|
||||||
|
prereqs.push(ir::Statement::Binding(
|
||||||
|
loc.clone(),
|
||||||
|
new_name.clone(),
|
||||||
|
ir::Expression::Primitive(loc.clone(), prim, new_exprs),
|
||||||
|
));
|
||||||
|
|
||||||
syntax::Expression::Value(loc, val) => ir::ValueOrRef::Value(loc, val.into()),
|
// and finally, we can return all the new bindings, and a reference to
|
||||||
|
// the variable we just introduced to hold the value of the primitive
|
||||||
|
// invocation.
|
||||||
|
(prereqs, ValueOrRef::Ref(loc, new_name))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<syntax::Value> for ir::Value {
|
impl From<syntax::Value> for ir::Value {
|
||||||
fn from(x: syntax::Value) -> Self {
|
fn from(value: syntax::Value) -> Self {
|
||||||
match x {
|
match value {
|
||||||
syntax::Value::Number(base, value) => ir::Value::Number(base, value),
|
syntax::Value::Number(base, val) => ir::Value::Number(base, val),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<String> for ir::Primitive {
|
||||||
|
fn from(value: String) -> Self {
|
||||||
|
value.try_into().unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate a fresh new name based on the given name.
|
||||||
|
///
|
||||||
|
/// The new name is guaranteed to be unique across the entirety of the
|
||||||
|
/// execution. This is achieved by using characters in the variable name
|
||||||
|
/// that would not be valid input, and by including a counter that is
|
||||||
|
/// incremented on every invocation.
|
||||||
|
fn gensym(name: &str) -> ArcIntern<String> {
|
||||||
|
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||||
|
|
||||||
|
let new_name = format!(
|
||||||
|
"<{}:{}>",
|
||||||
|
name,
|
||||||
|
COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||||
|
);
|
||||||
|
ArcIntern::new(new_name)
|
||||||
|
}
|
||||||
|
|
||||||
proptest::proptest! {
|
proptest::proptest! {
|
||||||
#[test]
|
#[test]
|
||||||
fn translation_maintains_semantics(input: syntax::Program) {
|
fn translation_maintains_semantics(input: syntax::Program) {
|
||||||
let syntax_result = input.eval();
|
let syntax_result = input.eval();
|
||||||
let ir = ir::Program::from(input.simplify());
|
let ir = ir::Program::from(input);
|
||||||
let ir_result = ir.eval();
|
let ir_result = ir.eval();
|
||||||
assert_eq!(syntax_result, ir_result);
|
assert_eq!(syntax_result, ir_result);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,10 @@ use internment::ArcIntern;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
impl Program {
|
impl Program {
|
||||||
|
/// Get the complete list of strings used within the program.
|
||||||
|
///
|
||||||
|
/// For the purposes of this function, strings are the variables used in
|
||||||
|
/// `print` statements.
|
||||||
pub fn strings(&self) -> HashSet<ArcIntern<String>> {
|
pub fn strings(&self) -> HashSet<ArcIntern<String>> {
|
||||||
let mut result = HashSet::new();
|
let mut result = HashSet::new();
|
||||||
|
|
||||||
|
|||||||
71
src/lib.rs
71
src/lib.rs
@@ -1,4 +1,75 @@
|
|||||||
|
//! # NGR (No Good Reason) Compiler
|
||||||
|
//!
|
||||||
|
//! This is the top-level module for the NGR compiler; a compiler written
|
||||||
|
//! in Rust for no good reason. I may eventually try to turn this into a
|
||||||
|
//! basic guide for writing compilers, but for now it's a fairly silly
|
||||||
|
//! (although complete) language and implementation, featuring:
|
||||||
|
//!
|
||||||
|
//! * Variable binding with basic arithmetic operators.
|
||||||
|
//! * The ability to print variable values.
|
||||||
|
//!
|
||||||
|
//! I'll be extending this list into the future, with the eventual goal of
|
||||||
|
//! being able to implement basic programming tasks with it. For example,
|
||||||
|
//! I have a goal of eventually writing reasonably-clear
|
||||||
|
//! [Advent of Code](https://adventofcode.com/) implementations with it.
|
||||||
|
//!
|
||||||
|
//! Users of this as a library will want to choose their adventure based
|
||||||
|
//! on how much they want to customize their experience; I've defaulted
|
||||||
|
//! to providing the ability to see internals, rather than masking them,
|
||||||
|
//! so folks can play with things as they see fit.
|
||||||
|
//!
|
||||||
|
//! ## Easy Mode - Just Running a REPL or Compiler
|
||||||
|
//!
|
||||||
|
//! For easiest use, you will want to use either the [`Compiler`] object
|
||||||
|
//! or the [`REPL`] object.
|
||||||
|
//!
|
||||||
|
//! As you might expect, the [`Compiler`] object builds a compiler, which
|
||||||
|
//! can be re-used to compile as many files as you'd like. Right now,
|
||||||
|
//! that's all it does. (TODO: Add a linker function to it.)
|
||||||
|
//!
|
||||||
|
//! The [`REPL`] object implements the core of what you'll need to
|
||||||
|
//! implement a just-in-time compiled read-eval-print loop. It will
|
||||||
|
//! maintain variable state and make sure that variables are linked
|
||||||
|
//! appropriately as the loop progresses.
|
||||||
|
//!
|
||||||
|
//! ## Hard Mode - Looking at the individual passes
|
||||||
|
//!
|
||||||
|
//! This compiler is broken into three core parts:
|
||||||
|
//!
|
||||||
|
//! 1. The front-end / syntax engine. This portion of the compiler is
|
||||||
|
//! responsible for turning basic strings (or files) into a machine-
|
||||||
|
//! friendly abstract syntax tree. See the [`syntax`] module for
|
||||||
|
//! more information.
|
||||||
|
//! 2. The IR. This portion of the compiler will be responsible for
|
||||||
|
//! high-level code analysis and transformation ... although for
|
||||||
|
//! now, it doesn't do much at all. See the [`ir`] module for more
|
||||||
|
//! information.
|
||||||
|
//! 3. The Backend implementation. This portion of the compiler turns
|
||||||
|
//! the IR from the previous section into Cranelift structures, and
|
||||||
|
//! helps with either compiling them via JIT or statically compiling
|
||||||
|
//! them into a file. The [`backend`] module also contains information
|
||||||
|
//! about the runtime functions made available to the user.
|
||||||
|
//!
|
||||||
|
//! ## Testing
|
||||||
|
//!
|
||||||
|
//! Testing is a key focus of this effort. To that end, both the syntax
|
||||||
|
//! tree used in the syntax module and the IR used in the middle of the
|
||||||
|
//! compiler both implement `Arbitrary`, and are subject to property-based
|
||||||
|
//! testing to make sure that various passes work properly.
|
||||||
|
//!
|
||||||
|
//! In addition, to support basic equivalence testing, we include support
|
||||||
|
//! for evaluating all expressions. The [`eval`] module provides some
|
||||||
|
//! utility support for this work.
|
||||||
|
//!
|
||||||
pub mod backend;
|
pub mod backend;
|
||||||
pub mod eval;
|
pub mod eval;
|
||||||
pub mod ir;
|
pub mod ir;
|
||||||
pub mod syntax;
|
pub mod syntax;
|
||||||
|
|
||||||
|
/// Implementation module for the high-level compiler.
|
||||||
|
mod compiler;
|
||||||
|
/// Implementation module for the high-level REPL.
|
||||||
|
mod repl;
|
||||||
|
|
||||||
|
pub use crate::compiler::Compiler;
|
||||||
|
pub use crate::repl::REPL;
|
||||||
|
|||||||
166
src/repl.rs
Normal file
166
src/repl.rs
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
use crate::backend::{Backend, BackendError};
|
||||||
|
use crate::ir::Program as IR;
|
||||||
|
use crate::syntax::{Location, ParserError, Statement};
|
||||||
|
use codespan_reporting::diagnostic::Diagnostic;
|
||||||
|
use codespan_reporting::files::SimpleFiles;
|
||||||
|
use codespan_reporting::term::{self, Config};
|
||||||
|
use cranelift_jit::JITModule;
|
||||||
|
use cranelift_module::ModuleError;
|
||||||
|
use pretty::termcolor::{ColorChoice, StandardStream};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// A high-level REPL helper for NGR.
|
||||||
|
///
|
||||||
|
/// This object holds most of the state required to implement some
|
||||||
|
/// form of interactive compiler for NGR; all you need to do is provide
|
||||||
|
/// the actual user IO.
|
||||||
|
///
|
||||||
|
/// For most console-based used cases, the [`Default`] implementation
|
||||||
|
/// should be sufficient; it prints any warnings or errors to `stdout`,
|
||||||
|
/// using a default color scheme that should work based on the terminal
|
||||||
|
/// type. For more complex interactions, though, you may want to use
|
||||||
|
/// the `REPL::new` function to provide your own print substrate.
|
||||||
|
pub struct REPL {
|
||||||
|
file_database: SimpleFiles<String, String>,
|
||||||
|
jitter: Backend<JITModule>,
|
||||||
|
variable_binding_sites: HashMap<String, Location>,
|
||||||
|
console: StandardStream,
|
||||||
|
console_config: Config,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for REPL {
|
||||||
|
fn default() -> Self {
|
||||||
|
let console = StandardStream::stdout(ColorChoice::Auto);
|
||||||
|
REPL::new(console, Config::default()).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::upper_case_acronyms)]
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
enum REPLError {
|
||||||
|
#[error("Error parsing statement: {0}")]
|
||||||
|
Parser(#[from] ParserError),
|
||||||
|
#[error("JIT error: {0}")]
|
||||||
|
JIT(#[from] BackendError),
|
||||||
|
#[error("Internal cranelift error: {0}")]
|
||||||
|
Cranelift(#[from] ModuleError),
|
||||||
|
#[error(transparent)]
|
||||||
|
Reporting(#[from] codespan_reporting::files::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<REPLError> for Diagnostic<usize> {
|
||||||
|
fn from(value: REPLError) -> Self {
|
||||||
|
match value {
|
||||||
|
REPLError::Parser(err) => Diagnostic::from(&err),
|
||||||
|
REPLError::JIT(err) => Diagnostic::from(err),
|
||||||
|
REPLError::Cranelift(err) => Diagnostic::bug().with_message(format!("{}", err)),
|
||||||
|
REPLError::Reporting(err) => Diagnostic::bug().with_message(format!("{}", err)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl REPL {
|
||||||
|
/// Construct a new REPL helper, using the given stream implementation and console configuration.
|
||||||
|
///
|
||||||
|
/// For most users, the [`Default::default`] implementation will be sufficient;
|
||||||
|
/// it will use `stdout` and a default console configuration. But if you need to
|
||||||
|
/// be more specific, this will help you provide more guidance to the REPL as it
|
||||||
|
/// evaluates things.
|
||||||
|
pub fn new(console: StandardStream, console_config: Config) -> Result<Self, BackendError> {
|
||||||
|
Ok(REPL {
|
||||||
|
file_database: SimpleFiles::new(),
|
||||||
|
jitter: Backend::jit(None)?,
|
||||||
|
variable_binding_sites: HashMap::new(),
|
||||||
|
console,
|
||||||
|
console_config,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit a diagnostic to the configured console.
|
||||||
|
///
|
||||||
|
/// This is just a convenience function; there's a lot of boilerplate in printing
|
||||||
|
/// diagnostics, and it was nice to pull it out into its own function.
|
||||||
|
fn emit_diagnostic(
|
||||||
|
&mut self,
|
||||||
|
diagnostic: Diagnostic<usize>,
|
||||||
|
) -> Result<(), codespan_reporting::files::Error> {
|
||||||
|
term::emit(
|
||||||
|
&mut self.console,
|
||||||
|
&self.console_config,
|
||||||
|
&self.file_database,
|
||||||
|
&diagnostic,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process a line of input, printing any problems or the results.
|
||||||
|
///
|
||||||
|
/// The line number argument is just for a modicum of source information, to
|
||||||
|
/// provide to the user if some parsing or validation step fails. It can be
|
||||||
|
/// changed to be any value you like that provides some insight into what
|
||||||
|
/// failed, although it is probably a good idea for it to be different for
|
||||||
|
/// every invocation of this function. (Not critical, but a good idea.)
|
||||||
|
///
|
||||||
|
/// Any warnings or errors generated in processing this command will be
|
||||||
|
/// printed to the configured console. If there are no problems, the
|
||||||
|
/// command will be compiled and then executed.
|
||||||
|
pub fn process_input(&mut self, line_no: usize, command: String) {
|
||||||
|
if let Err(err) = self.process(line_no, command) {
|
||||||
|
if let Err(e) = self.emit_diagnostic(Diagnostic::from(err)) {
|
||||||
|
eprintln!(
|
||||||
|
"WOAH! System having trouble printing error messages. This is very bad. ({})",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The internal implementation, with a handy `Result` type.
|
||||||
|
///
|
||||||
|
/// All information from the documentation of `REPL::process_input` applies here,
|
||||||
|
/// as well; this is the internal implementation of that function, which is
|
||||||
|
/// differentiated by returning a `Result` type that is hidden from the user
|
||||||
|
/// in the case of `REPL::process_input`.
|
||||||
|
fn process(&mut self, line_no: usize, command: String) -> Result<(), REPLError> {
|
||||||
|
let entry = self.file_database.add("entry".to_string(), command);
|
||||||
|
let source = self
|
||||||
|
.file_database
|
||||||
|
.get(entry)
|
||||||
|
.expect("entry exists")
|
||||||
|
.source();
|
||||||
|
let syntax = Statement::parse(entry, source)?;
|
||||||
|
|
||||||
|
// if this is a variable binding, and we've never defined this variable before,
|
||||||
|
// we should tell cranelift about it. this is optimistic; if we fail to compile,
|
||||||
|
// then we won't use this definition until someone tries again.
|
||||||
|
if let Statement::Binding(_, ref name, _) = syntax {
|
||||||
|
if !self.variable_binding_sites.contains_key(name.as_str()) {
|
||||||
|
self.jitter.define_string(name)?;
|
||||||
|
self.jitter.define_variable(name.clone())?;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let (mut errors, mut warnings) = syntax.validate(&mut self.variable_binding_sites);
|
||||||
|
let stop = !errors.is_empty();
|
||||||
|
let messages = errors
|
||||||
|
.drain(..)
|
||||||
|
.map(Into::into)
|
||||||
|
.chain(warnings.drain(..).map(Into::into));
|
||||||
|
|
||||||
|
for message in messages {
|
||||||
|
self.emit_diagnostic(message)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if stop {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let ir = IR::from(syntax);
|
||||||
|
let name = format!("line{}", line_no);
|
||||||
|
let function_id = self.jitter.compile_function(&name, ir)?;
|
||||||
|
self.jitter.module.finalize_definitions()?;
|
||||||
|
let compiled_bytes = self.jitter.bytes(function_id);
|
||||||
|
let compiled_function = unsafe { std::mem::transmute::<_, fn() -> ()>(compiled_bytes) };
|
||||||
|
compiled_function();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,36 @@
|
|||||||
|
//! NGR Parsing: Reading input, turning it into sense (or errors).
|
||||||
|
//!
|
||||||
|
//! This module implement the front end of the compiler, which is responsible for
|
||||||
|
//! reading in NGR syntax as a string, turning it into a series of reasonable Rust
|
||||||
|
//! structures for us to manipulate, and doing some validation while it's at it.
|
||||||
|
//!
|
||||||
|
//! The core flow for this work is:
|
||||||
|
//!
|
||||||
|
//! * Turning the string into a series of language-specific [`Token`]s.
|
||||||
|
//! * Taking those tokens, and computing a basic syntax tree from them,
|
||||||
|
//! using our parser ([`ProgramParser`] or [`StatementParser`], generated
|
||||||
|
//! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
|
||||||
|
//! * Validating the tree we have parsed, using [`Program::validate`],
|
||||||
|
//! returning any warnings or errors we have found.
|
||||||
|
//!
|
||||||
|
//! In addition to all of this, we make sure that the structures defined in this
|
||||||
|
//! module are all:
|
||||||
|
//!
|
||||||
|
//! * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back
|
||||||
|
//! out that can be read by a human.
|
||||||
|
//! * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be
|
||||||
|
//! used in `proptest`-based property testing. There are built-in tests in
|
||||||
|
//! the library, for example, to make sure that the pretty-printing round-trips.
|
||||||
|
//! * Can be evaluated using an `eval` function, for comparison with later
|
||||||
|
//! versions of the function downstream.
|
||||||
use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
|
use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
|
||||||
use lalrpop_util::lalrpop_mod;
|
use lalrpop_util::lalrpop_mod;
|
||||||
use logos::Logos;
|
use logos::Logos;
|
||||||
|
|
||||||
mod arbitrary;
|
mod arbitrary;
|
||||||
pub mod ast;
|
mod ast;
|
||||||
mod eval;
|
mod eval;
|
||||||
mod location;
|
mod location;
|
||||||
mod simplify;
|
|
||||||
mod tokens;
|
mod tokens;
|
||||||
lalrpop_mod!(
|
lalrpop_mod!(
|
||||||
#[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
|
#[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
|
||||||
@@ -18,7 +42,7 @@ mod validate;
|
|||||||
|
|
||||||
pub use crate::syntax::ast::*;
|
pub use crate::syntax::ast::*;
|
||||||
pub use crate::syntax::location::Location;
|
pub use crate::syntax::location::Location;
|
||||||
use crate::syntax::parser::ProgramParser;
|
pub use crate::syntax::parser::{ProgramParser, StatementParser};
|
||||||
pub use crate::syntax::tokens::{LexerError, Token};
|
pub use crate::syntax::tokens::{LexerError, Token};
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
use ::pretty::{Arena, Pretty};
|
use ::pretty::{Arena, Pretty};
|
||||||
@@ -29,33 +53,62 @@ use proptest::{prop_assert, prop_assert_eq};
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use self::parser::StatementParser;
|
/// One of the many errors that can occur when processing text input.
|
||||||
|
///
|
||||||
|
/// If you get one of these and want to display it to the user, we strongly
|
||||||
|
/// suggest using the [`From`] implementation to turn this into a [`Diagnostic`],
|
||||||
|
/// and then printing it via [`codespan_reporting`].
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum ParserError {
|
pub enum ParserError {
|
||||||
|
/// Raised by the lexer when we see some text that doesn't make
|
||||||
|
/// any sense in the language.
|
||||||
#[error("Invalid token")]
|
#[error("Invalid token")]
|
||||||
InvalidToken(Location),
|
InvalidToken(Location),
|
||||||
|
|
||||||
|
/// Raised when we're parsing the file and run into an EOF in a
|
||||||
|
/// place we really weren't expecting.
|
||||||
#[error("Unrecognized EOF")]
|
#[error("Unrecognized EOF")]
|
||||||
UnrecognizedEOF(Location, Vec<String>),
|
UnrecognizedEOF(Location, Vec<String>),
|
||||||
|
|
||||||
|
/// Raised when we're parsing the file, and run into a token in a
|
||||||
|
/// place we weren't expecting it.
|
||||||
#[error("Unrecognized token")]
|
#[error("Unrecognized token")]
|
||||||
UnrecognizedToken(Location, Location, Token, Vec<String>),
|
UnrecognizedToken(Location, Location, Token, Vec<String>),
|
||||||
|
|
||||||
|
/// Raised when we were expecting the end of the file, but instead
|
||||||
|
/// got another token.
|
||||||
#[error("Extra token")]
|
#[error("Extra token")]
|
||||||
ExtraToken(Location, Token, Location),
|
ExtraToken(Location, Token, Location),
|
||||||
|
|
||||||
|
/// Raised when the lexer just had some sort of internal problem
|
||||||
|
/// and just gave up.
|
||||||
#[error("Lexing failure")]
|
#[error("Lexing failure")]
|
||||||
LexFailure(Location),
|
LexFailure(Location),
|
||||||
|
|
||||||
|
/// Raised when we tried to reference a file, or add a file, to our
|
||||||
|
/// file database, and the database ran into a problem.
|
||||||
#[error("File database error")]
|
#[error("File database error")]
|
||||||
FileDatabaseError(#[from] codespan_reporting::files::Error),
|
FileDatabaseError(#[from] codespan_reporting::files::Error),
|
||||||
|
|
||||||
|
/// Raised when the OS is having problems giving us data.
|
||||||
#[error("Read error")]
|
#[error("Read error")]
|
||||||
ReadError(#[from] std::io::Error),
|
ReadError(#[from] std::io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ParserError {
|
impl ParserError {
|
||||||
|
/// Convert one of lalrpop's parser errors into one of our own, which we can more
|
||||||
|
/// easily implement translation into [`Diagnostic`].
|
||||||
|
///
|
||||||
|
/// This function is relatively straightforward, because we match the errors pretty
|
||||||
|
/// closely. The major thing we do here is convert [`lalrpop`]'s notion of a location,
|
||||||
|
/// which is just an offset that it got from the lexer, into an actual location that
|
||||||
|
/// we can use in our [`Diagnostic`]s.
|
||||||
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
|
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
|
||||||
match err {
|
match err {
|
||||||
ParseError::InvalidToken { location } => {
|
ParseError::InvalidToken { location } => {
|
||||||
ParserError::InvalidToken(Location::new(file_idx, location))
|
ParserError::InvalidToken(Location::new(file_idx, location))
|
||||||
}
|
}
|
||||||
ParseError::UnrecognizedEOF { location, expected } => {
|
ParseError::UnrecognizedEof { location, expected } => {
|
||||||
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
|
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
|
||||||
}
|
}
|
||||||
ParseError::UnrecognizedToken {
|
ParseError::UnrecognizedToken {
|
||||||
@@ -83,6 +136,10 @@ impl ParserError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This is just a nice little function to print out what we expected, if
|
||||||
|
/// we had some expectations. Because English is a little wonky, there's
|
||||||
|
/// some odd stuff with whether we get 0, 1, 2, or more, and it's nice to
|
||||||
|
/// just split that bit of logic out.
|
||||||
fn display_expected(expected: &[String]) -> String {
|
fn display_expected(expected: &[String]) -> String {
|
||||||
match expected.len() {
|
match expected.len() {
|
||||||
0 => "".to_string(),
|
0 => "".to_string(),
|
||||||
@@ -96,6 +153,8 @@ fn display_expected(expected: &[String]) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a list of strings, comma separate (with a space) them, as in an
|
||||||
|
/// English list.
|
||||||
fn comma_separate(strings: &[String]) -> String {
|
fn comma_separate(strings: &[String]) -> String {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
|
|
||||||
@@ -125,12 +184,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
|
|||||||
let expected_str =
|
let expected_str =
|
||||||
format!("unexpected token {}{}", token, display_expected(expected));
|
format!("unexpected token {}{}", token, display_expected(expected));
|
||||||
let unexpected_str = format!("unexpected token {}", token);
|
let unexpected_str = format!("unexpected token {}", token);
|
||||||
let mut labels = start.range_label(end);
|
let labels = start.range_label(end);
|
||||||
|
|
||||||
Diagnostic::error()
|
Diagnostic::error()
|
||||||
.with_labels(
|
.with_labels(
|
||||||
labels
|
labels
|
||||||
.drain(..)
|
.into_iter()
|
||||||
.map(|l| l.with_message(unexpected_str.clone()))
|
.map(|l| l.with_message(unexpected_str.clone()))
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
@@ -142,12 +201,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
|
|||||||
let expected_str =
|
let expected_str =
|
||||||
format!("unexpected token {} after the expected end of file", token);
|
format!("unexpected token {} after the expected end of file", token);
|
||||||
let unexpected_str = format!("unexpected token {}", token);
|
let unexpected_str = format!("unexpected token {}", token);
|
||||||
let mut labels = start.range_label(end);
|
let labels = start.range_label(end);
|
||||||
|
|
||||||
Diagnostic::error()
|
Diagnostic::error()
|
||||||
.with_labels(
|
.with_labels(
|
||||||
labels
|
labels
|
||||||
.drain(..)
|
.into_iter()
|
||||||
.map(|l| l.with_message(unexpected_str.clone()))
|
.map(|l| l.with_message(unexpected_str.clone()))
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
@@ -167,6 +226,14 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Program {
|
impl Program {
|
||||||
|
/// Parse the given file, adding it to the database as part of the process.
|
||||||
|
///
|
||||||
|
/// This operation reads the file from disk and adds it to the database for future
|
||||||
|
/// reference. If you get an error, we strongly suggest conversion to [`Diagnostic`]
|
||||||
|
/// and then reporting it to the user via [`codespan_reporting`]. You should use
|
||||||
|
/// this function if you're pretty sure that you've never seen this file before,
|
||||||
|
/// and [`Program::parse`] if you have and know its index and already have it in
|
||||||
|
/// memory.
|
||||||
pub fn parse_file(
|
pub fn parse_file(
|
||||||
file_database: &mut SimpleFiles<String, String>,
|
file_database: &mut SimpleFiles<String, String>,
|
||||||
file_name: &str,
|
file_name: &str,
|
||||||
@@ -177,6 +244,11 @@ impl Program {
|
|||||||
Program::parse(file_handle, file_db_info.source())
|
Program::parse(file_handle, file_db_info.source())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a block of text you have in memory, using the given index for [`Location`]s.
|
||||||
|
///
|
||||||
|
/// If you use a nonsensical file index, everything will work fine until you try to
|
||||||
|
/// report an error, at which point [`codespan_reporting`] may have some nasty things
|
||||||
|
/// to say to you.
|
||||||
pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
|
pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
|
||||||
let lexer = Token::lexer(buffer)
|
let lexer = Token::lexer(buffer)
|
||||||
.spanned()
|
.spanned()
|
||||||
@@ -188,6 +260,12 @@ impl Program {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Statement {
|
impl Statement {
|
||||||
|
/// Parse a statement that you have in memory, using the given index for [`Location`]s.
|
||||||
|
///
|
||||||
|
/// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors
|
||||||
|
/// when you try to print errors, but things should otherwise work fine. This function
|
||||||
|
/// will only parse a single statement, which is useful in the REPL, but probably shouldn't
|
||||||
|
/// be used when reading in whole files.
|
||||||
pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
|
pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
|
||||||
let lexer = Token::lexer(buffer)
|
let lexer = Token::lexer(buffer)
|
||||||
.spanned()
|
.spanned()
|
||||||
|
|||||||
@@ -1,12 +1,32 @@
|
|||||||
use crate::syntax::Location;
|
use crate::syntax::Location;
|
||||||
|
|
||||||
|
/// The set of valid binary operators.
|
||||||
pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"];
|
pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"];
|
||||||
|
|
||||||
|
/// A structure represented a parsed program.
|
||||||
|
///
|
||||||
|
/// One `Program` is associated with exactly one input file, and the
|
||||||
|
/// vector is arranged in exactly the same order as the parsed file.
|
||||||
|
/// Because this is the syntax layer, the program is guaranteed to be
|
||||||
|
/// syntactically valid, but may be nonsense. There could be attempts
|
||||||
|
/// to use unbound variables, for example, until after someone runs
|
||||||
|
/// `validate` and it comes back without errors.
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub struct Program {
|
pub struct Program {
|
||||||
pub statements: Vec<Statement>,
|
pub statements: Vec<Statement>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A parsed statement.
|
||||||
|
///
|
||||||
|
/// Statements are guaranteed to be syntactically valid, but may be
|
||||||
|
/// complete nonsense at the semantic level. Which is to say, all the
|
||||||
|
/// print statements were correctly formatted, and all the variables
|
||||||
|
/// referenced are definitely valid symbols, but they may not have
|
||||||
|
/// been defined or anything.
|
||||||
|
///
|
||||||
|
/// Note that equivalence testing on statements is independent of
|
||||||
|
/// source location; it is testing if the two statements say the same
|
||||||
|
/// thing, not if they are the exact same statement.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Statement {
|
pub enum Statement {
|
||||||
Binding(Location, String, Expression),
|
Binding(Location, String, Expression),
|
||||||
@@ -28,6 +48,12 @@ impl PartialEq for Statement {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An expression in the underlying syntax.
|
||||||
|
///
|
||||||
|
/// Like statements, these expressions are guaranteed to have been
|
||||||
|
/// formatted correctly, but may not actually make any sense. Also
|
||||||
|
/// like Statements, the [`PartialEq`] implementation does not take
|
||||||
|
/// source positions into account.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Expression {
|
pub enum Expression {
|
||||||
Value(Location, Value),
|
Value(Location, Value),
|
||||||
@@ -54,7 +80,9 @@ impl PartialEq for Expression {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A value from the source syntax
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
|
/// The value of the number, and an optional base that it was written in
|
||||||
Number(Option<u8>, i64),
|
Number(Option<u8>, i64),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,11 +4,23 @@ use crate::eval::{EvalEnvironment, EvalError, Value};
|
|||||||
use crate::syntax::{Expression, Program, Statement};
|
use crate::syntax::{Expression, Program, Statement};
|
||||||
|
|
||||||
impl Program {
|
impl Program {
|
||||||
|
/// Evaluate the program, returning either an error or what it prints out when run.
|
||||||
|
///
|
||||||
|
/// Doing this evaluation is particularly useful for testing, to ensure that if we
|
||||||
|
/// modify a program in some way it does the same thing on both sides of the
|
||||||
|
/// transformation. It's also sometimes just nice to know what a program will be
|
||||||
|
/// doing.
|
||||||
|
///
|
||||||
|
/// Note that the errors here are slightly more strict that we enforce at runtime.
|
||||||
|
/// For example, we check for overflow and underflow errors during evaluation, and
|
||||||
|
/// we don't check for those in the compiled code.
|
||||||
pub fn eval(&self) -> Result<String, EvalError> {
|
pub fn eval(&self) -> Result<String, EvalError> {
|
||||||
let mut env = EvalEnvironment::empty();
|
let mut env = EvalEnvironment::empty();
|
||||||
let mut stdout = String::new();
|
let mut stdout = String::new();
|
||||||
|
|
||||||
for stmt in self.statements.iter() {
|
for stmt in self.statements.iter() {
|
||||||
|
// at this point, evaluation is pretty simple. just walk through each
|
||||||
|
// statement, in order, and record printouts as we come to them.
|
||||||
match stmt {
|
match stmt {
|
||||||
Statement::Binding(_, name, value) => {
|
Statement::Binding(_, name, value) => {
|
||||||
let actual_value = value.eval(&env)?;
|
let actual_value = value.eval(&env)?;
|
||||||
@@ -40,6 +52,7 @@ impl Expression {
|
|||||||
let mut arg_values = Vec::with_capacity(args.len());
|
let mut arg_values = Vec::with_capacity(args.len());
|
||||||
|
|
||||||
for arg in args.iter() {
|
for arg in args.iter() {
|
||||||
|
// yay, recursion! makes this pretty straightforward
|
||||||
arg_values.push(arg.eval(env)?);
|
arg_values.push(arg.eval(env)?);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
use codespan_reporting::diagnostic::{Diagnostic, Label};
|
use codespan_reporting::diagnostic::{Diagnostic, Label};
|
||||||
|
|
||||||
|
/// A source location, for use in pointing users towards warnings and errors.
|
||||||
|
///
|
||||||
|
/// Internally, locations are very tied to the `codespan_reporting` library,
|
||||||
|
/// and the primary use of them is to serve as anchors within that library.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub struct Location {
|
pub struct Location {
|
||||||
file_idx: usize,
|
file_idx: usize,
|
||||||
@@ -7,10 +11,22 @@ pub struct Location {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Location {
|
impl Location {
|
||||||
|
/// Generate a new `Location` from a file index and an offset from the
|
||||||
|
/// start of the file.
|
||||||
|
///
|
||||||
|
/// The file index is based on the file database being used. See the
|
||||||
|
/// `codespan_reporting::files::SimpleFiles::add` function, which is
|
||||||
|
/// normally where we get this index.
|
||||||
pub fn new(file_idx: usize, offset: usize) -> Self {
|
pub fn new(file_idx: usize, offset: usize) -> Self {
|
||||||
Location { file_idx, offset }
|
Location { file_idx, offset }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a `Location` for a completely manufactured bit of code.
|
||||||
|
///
|
||||||
|
/// Ideally, this is used only in testing, as any code we generate as
|
||||||
|
/// part of the compiler should, theoretically, be tied to some actual
|
||||||
|
/// location in the source code. That being said, this can be used in
|
||||||
|
/// a pinch ... just maybe try to avoid it if you can.
|
||||||
pub fn manufactured() -> Self {
|
pub fn manufactured() -> Self {
|
||||||
Location {
|
Location {
|
||||||
file_idx: 0,
|
file_idx: 0,
|
||||||
@@ -18,27 +34,73 @@ impl Location {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a primary label for a [`Diagnostic`], based on this source
|
||||||
|
/// location.
|
||||||
|
///
|
||||||
|
/// Note, this is just the [`Label`], you'll want to fill in the [`Diagnostic`]
|
||||||
|
/// with a lot more information.
|
||||||
|
///
|
||||||
|
/// Primary labels are the things that are they key cause of the message.
|
||||||
|
/// If, for example, it was an error to bind a variable named "x", and
|
||||||
|
/// then have another binding of a variable named "x", the second one
|
||||||
|
/// would likely be the primary label (because that's where the error
|
||||||
|
/// actually happened), but you'd probably want to make the first location
|
||||||
|
/// the secondary label to help users find it.
|
||||||
pub fn primary_label(&self) -> Label<usize> {
|
pub fn primary_label(&self) -> Label<usize> {
|
||||||
Label::primary(self.file_idx, self.offset..self.offset)
|
Label::primary(self.file_idx, self.offset..self.offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a secondary label for a [`Diagnostic`], based on this source
|
||||||
|
/// location.
|
||||||
|
///
|
||||||
|
/// Note, this is just the [`Label`], you'll want to fill in the [`Diagnostic`]
|
||||||
|
/// with a lot more information.
|
||||||
|
///
|
||||||
|
/// Secondary labels are the things that are involved in the message, but
|
||||||
|
/// aren't necessarily a problem in and of themselves. If, for example, it
|
||||||
|
/// was an error to bind a variable named "x", and then have another binding
|
||||||
|
/// of a variable named "x", the second one would likely be the primary
|
||||||
|
/// label (because that's where the error actually happened), but you'd
|
||||||
|
/// probably want to make the first location the secondary label to help
|
||||||
|
/// users find it.
|
||||||
pub fn secondary_label(&self) -> Label<usize> {
|
pub fn secondary_label(&self) -> Label<usize> {
|
||||||
Label::secondary(self.file_idx, self.offset..self.offset)
|
Label::secondary(self.file_idx, self.offset..self.offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn range_label(&self, end: &Location) -> Vec<Label<usize>> {
|
/// Given this location and another, generate a primary label that
|
||||||
if self.file_idx == end.file_idx {
|
/// specifies the area between those two locations.
|
||||||
vec![Label::primary(self.file_idx, self.offset..end.offset)]
|
///
|
||||||
} else if self.file_idx == 0 {
|
/// See [`Self::primary_label`] for some discussion of primary versus
|
||||||
// if this is a manufactured item, then ... just try the other one
|
/// secondary labels. If the two locations are the same, this method does
|
||||||
vec![Label::primary(end.file_idx, end.offset..end.offset)]
|
/// the exact same thing as [`Self::primary_label`]. If this item was
|
||||||
|
/// generated by [`Self::manufactured`], it will act as if you'd called
|
||||||
|
/// `primary_label` on the argument. Otherwise, it will generate the obvious
|
||||||
|
/// span.
|
||||||
|
///
|
||||||
|
/// This function will return `None` only in the case that you provide
|
||||||
|
/// labels from two different files, which it cannot sensibly handle.
|
||||||
|
pub fn range_label(&self, end: &Location) -> Option<Label<usize>> {
|
||||||
|
if self.file_idx == 0 {
|
||||||
|
return Some(end.primary_label());
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.file_idx != end.file_idx {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.offset > end.offset {
|
||||||
|
Some(Label::primary(self.file_idx, end.offset..self.offset))
|
||||||
} else {
|
} else {
|
||||||
// we'll just pick the first location if this is in two different
|
Some(Label::primary(self.file_idx, self.offset..end.offset))
|
||||||
// files
|
|
||||||
vec![Label::primary(self.file_idx, self.offset..self.offset)]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return an error diagnostic centered at this location.
|
||||||
|
///
|
||||||
|
/// Note that this [`Diagnostic`] will have no information associated with
|
||||||
|
/// it other than that (a) there is an error, and (b) that the error is at
|
||||||
|
/// this particular location. You'll need to extend it with actually useful
|
||||||
|
/// information, like what kind of error it is.
|
||||||
pub fn error(&self) -> Diagnostic<usize> {
|
pub fn error(&self) -> Diagnostic<usize> {
|
||||||
Diagnostic::error().with_labels(vec![Label::primary(
|
Diagnostic::error().with_labels(vec![Label::primary(
|
||||||
self.file_idx,
|
self.file_idx,
|
||||||
@@ -46,6 +108,12 @@ impl Location {
|
|||||||
)])
|
)])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return an error diagnostic centered at this location, with the given message.
|
||||||
|
///
|
||||||
|
/// This is much more useful than [`Self::error`], because it actually provides
|
||||||
|
/// the user with some guidance. That being said, you still might want to add
|
||||||
|
/// even more information to ut, using [`Diagnostic::with_labels`],
|
||||||
|
/// [`Diagnostic::with_notes`], or [`Diagnostic::with_code`].
|
||||||
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
|
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
|
||||||
Diagnostic::error().with_labels(vec![Label::primary(
|
Diagnostic::error().with_labels(vec![Label::primary(
|
||||||
self.file_idx,
|
self.file_idx,
|
||||||
|
|||||||
@@ -1,14 +1,32 @@
|
|||||||
|
//! The parser for NGR!
|
||||||
|
//!
|
||||||
|
//! This file contains the grammar for the NGR language; a grammar is a nice,
|
||||||
|
//! machine-readable way to describe how your language's syntax works. For
|
||||||
|
//! example, here we describe a program as a series of statements, statements
|
||||||
|
//! as either variable binding or print statements, etc. As the grammar gets
|
||||||
|
//! more complicated, using tools like [`lalrpop`] becomes even more important.
|
||||||
|
//! (Although, at some point, things can become so complicated that you might
|
||||||
|
//! eventually want to leave lalrpop behind.)
|
||||||
|
//!
|
||||||
use crate::syntax::{LexerError, Location};
|
use crate::syntax::{LexerError, Location};
|
||||||
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
||||||
use crate::syntax::tokens::Token;
|
use crate::syntax::tokens::Token;
|
||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
|
|
||||||
|
// one cool thing about lalrpop: we can pass arguments. in this case, the
|
||||||
|
// file index of the file we're parsing. we combine this with the file offset
|
||||||
|
// that Logos gives us to make a [`crate::syntax::Location`].
|
||||||
grammar(file_idx: usize);
|
grammar(file_idx: usize);
|
||||||
|
|
||||||
|
// this is a slighlyt odd way to describe this, but: consider this section
|
||||||
|
// as describing the stuff that is external to the lalrpop grammar that it
|
||||||
|
// needs to know to do its job.
|
||||||
extern {
|
extern {
|
||||||
type Location = usize;
|
type Location = usize; // Logos, our lexer, implements locations as
|
||||||
|
// offsets from the start of the file.
|
||||||
type Error = LexerError;
|
type Error = LexerError;
|
||||||
|
|
||||||
|
// here we redeclare all of the tokens.
|
||||||
enum Token {
|
enum Token {
|
||||||
"=" => Token::Equals,
|
"=" => Token::Equals,
|
||||||
";" => Token::Semi,
|
";" => Token::Semi,
|
||||||
@@ -22,57 +40,123 @@ extern {
|
|||||||
"*" => Token::Operator('*'),
|
"*" => Token::Operator('*'),
|
||||||
"/" => Token::Operator('/'),
|
"/" => Token::Operator('/'),
|
||||||
|
|
||||||
|
// the previous items just match their tokens, and if you try
|
||||||
|
// to name and use "their value", you get their source location.
|
||||||
|
// For these, we want "their value" to be their actual contents,
|
||||||
|
// which is why we put their types in angle brackets.
|
||||||
"<num>" => Token::Number((<Option<u8>>,<i64>)),
|
"<num>" => Token::Number((<Option<u8>>,<i64>)),
|
||||||
"<var>" => Token::Variable(<ArcIntern<String>>),
|
"<var>" => Token::Variable(<ArcIntern<String>>),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub Program: Program = {
|
pub Program: Program = {
|
||||||
|
// a program is just a set of statements
|
||||||
<stmts:Statements> => Program {
|
<stmts:Statements> => Program {
|
||||||
statements: stmts
|
statements: stmts
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Statements: Vec<Statement> = {
|
Statements: Vec<Statement> = {
|
||||||
|
// a statement is either a set of statements followed by another
|
||||||
|
// statement (note, here, that you can name the result of a sub-parse
|
||||||
|
// using <name: subrule>) ...
|
||||||
<mut stmts:Statements> <stmt:Statement> => {
|
<mut stmts:Statements> <stmt:Statement> => {
|
||||||
stmts.push(stmt);
|
stmts.push(stmt);
|
||||||
stmts
|
stmts
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// ... or it's nothing. This may feel like an awkward way to define
|
||||||
|
// lists of things -- and it is a bit awkward -- but there are actual
|
||||||
|
// technical reasons that you want to (a) use recursivion to define
|
||||||
|
// these, and (b) use *left* recursion, specifically. That's why, in
|
||||||
|
// this file, all of the recursive cases are to the left, like they
|
||||||
|
// are above.
|
||||||
|
//
|
||||||
|
// the details of why left recursion is better is actually pretty
|
||||||
|
// fiddly and in the weeds, and if you're interested you should look
|
||||||
|
// up LALR parsers versus LL parsers; both their differences and how
|
||||||
|
// they're constructed, as they're kind of neat.
|
||||||
|
//
|
||||||
|
// but if you're just writing grammars with lalrpop, then you should
|
||||||
|
// just remember that you should always use left recursion, and be
|
||||||
|
// done with it.
|
||||||
=> {
|
=> {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub Statement: Statement = {
|
pub Statement: Statement = {
|
||||||
|
// A statement can be a variable binding. Note, here, that we use this
|
||||||
|
// funny @L thing to get the source location before the variable, so that
|
||||||
|
// we can say that this statement spans across everything.
|
||||||
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
|
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
|
||||||
|
|
||||||
|
// Alternatively, a statement can just be a print statement.
|
||||||
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
|
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Expressions! Expressions are a little fiddly, because we're going to
|
||||||
|
// use a little bit of a trick to make sure that we get operator precedence
|
||||||
|
// right. The trick works by creating a top-level `Expression` grammar entry
|
||||||
|
// that just points to the thing with the *weakest* precedence. In this case,
|
||||||
|
// we have addition, subtraction, multiplication, and division, so addition
|
||||||
|
// and subtraction have the weakest precedence.
|
||||||
|
//
|
||||||
|
// Then, as we go down the precedence tree, each item will recurse (left!)
|
||||||
|
// to other items at the same precedence level. The right hand operator, for
|
||||||
|
// binary operators (which is all of ours, at the moment) will then be one
|
||||||
|
// level stronger precendence. In addition, we'll let people just fall through
|
||||||
|
// to the next level; so if there isn't an addition or subtraction, we'll just
|
||||||
|
// fall through to the multiplication/division case.
|
||||||
|
//
|
||||||
|
// Finally, at the bottom, we'll have the core expressions (like constants,
|
||||||
|
// variables, etc.) as well as a parenthesized version of `Expression`, which
|
||||||
|
// gets us right up top again.
|
||||||
|
//
|
||||||
|
// Understanding why this works to solve all your operator precedence problems
|
||||||
|
// is a little hard to give an easy intuition for, but for myself it helped
|
||||||
|
// to run through a few examples. Consider thinking about how you want to
|
||||||
|
// parse something like "1 + 2 * 3", for example, versus "1 + 2 + 3" or
|
||||||
|
// "1 * 2 + 3", and hopefully that'll help.
|
||||||
Expression: Expression = {
|
Expression: Expression = {
|
||||||
AdditiveExpression,
|
AdditiveExpression,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we group addition and subtraction under the heading "additive"
|
||||||
AdditiveExpression: Expression = {
|
AdditiveExpression: Expression = {
|
||||||
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
|
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
|
||||||
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
|
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
|
||||||
MultiplicativeExpression,
|
MultiplicativeExpression,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// similarly, we group multiplication and division under "multiplicative"
|
||||||
MultiplicativeExpression: Expression = {
|
MultiplicativeExpression: Expression = {
|
||||||
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
|
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
|
||||||
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
|
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
|
||||||
AtomicExpression,
|
AtomicExpression,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// finally, we describe our lowest-level expressions as "atomic", because
|
||||||
|
// they cannot be further divided into parts
|
||||||
AtomicExpression: Expression = {
|
AtomicExpression: Expression = {
|
||||||
|
// just a variable reference
|
||||||
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
|
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
|
||||||
|
// just a number
|
||||||
<l:@L> <n:"<num>"> => {
|
<l:@L> <n:"<num>"> => {
|
||||||
let val = Value::Number(n.0, n.1);
|
let val = Value::Number(n.0, n.1);
|
||||||
Expression::Value(Location::new(file_idx, l), val)
|
Expression::Value(Location::new(file_idx, l), val)
|
||||||
},
|
},
|
||||||
|
// a tricky case: also just a number, but using a negative sign. an
|
||||||
|
// alternative way to do this -- and we may do this eventually -- is
|
||||||
|
// to implement a unary negation expression. this has the odd effect
|
||||||
|
// that the user never actually writes down a negative number; they just
|
||||||
|
// write positive numbers which are immediately sent to a negation
|
||||||
|
// primitive!
|
||||||
<l:@L> "-" <n:"<num>"> => {
|
<l:@L> "-" <n:"<num>"> => {
|
||||||
let val = Value::Number(n.0, -n.1);
|
let val = Value::Number(n.0, -n.1);
|
||||||
Expression::Value(Location::new(file_idx, l), val)
|
Expression::Value(Location::new(file_idx, l), val)
|
||||||
},
|
},
|
||||||
|
// finally, let people parenthesize expressions and get back to a
|
||||||
|
// lower precedence
|
||||||
"(" <e:Expression> ")" => e,
|
"(" <e:Expression> ")" => e,
|
||||||
}
|
}
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
use crate::syntax::ast::{Expression, Program, Statement};
|
|
||||||
|
|
||||||
impl Program {
|
|
||||||
pub fn simplify(mut self) -> Self {
|
|
||||||
let mut new_statements = Vec::new();
|
|
||||||
let mut gensym_index = 1;
|
|
||||||
|
|
||||||
for stmt in self.statements.drain(..) {
|
|
||||||
new_statements.append(&mut stmt.simplify(&mut gensym_index));
|
|
||||||
}
|
|
||||||
|
|
||||||
self.statements = new_statements;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Statement {
|
|
||||||
pub fn simplify(self, gensym_index: &mut usize) -> Vec<Statement> {
|
|
||||||
let mut new_statements = vec![];
|
|
||||||
|
|
||||||
match self {
|
|
||||||
Statement::Print(_, _) => new_statements.push(self),
|
|
||||||
Statement::Binding(_, _, Expression::Reference(_, _)) => new_statements.push(self),
|
|
||||||
Statement::Binding(_, _, Expression::Value(_, _)) => new_statements.push(self),
|
|
||||||
Statement::Binding(loc, name, value) => {
|
|
||||||
let (mut prereqs, new_value) = value.rebind(&name, gensym_index);
|
|
||||||
new_statements.append(&mut prereqs);
|
|
||||||
new_statements.push(Statement::Binding(loc, name, new_value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
new_statements
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Expression {
|
|
||||||
fn rebind(self, base_name: &str, gensym_index: &mut usize) -> (Vec<Statement>, Expression) {
|
|
||||||
match self {
|
|
||||||
Expression::Value(_, _) => (vec![], self),
|
|
||||||
Expression::Reference(_, _) => (vec![], self),
|
|
||||||
Expression::Primitive(loc, prim, mut expressions) => {
|
|
||||||
let mut prereqs = Vec::new();
|
|
||||||
let mut new_exprs = Vec::new();
|
|
||||||
|
|
||||||
for expr in expressions.drain(..) {
|
|
||||||
let (mut cur_prereqs, arg) = expr.rebind(base_name, gensym_index);
|
|
||||||
prereqs.append(&mut cur_prereqs);
|
|
||||||
new_exprs.push(arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
let new_name = format!("<{}:{}>", base_name, *gensym_index);
|
|
||||||
*gensym_index += 1;
|
|
||||||
prereqs.push(Statement::Binding(
|
|
||||||
loc.clone(),
|
|
||||||
new_name.clone(),
|
|
||||||
Expression::Primitive(loc.clone(), prim, new_exprs),
|
|
||||||
));
|
|
||||||
|
|
||||||
(prereqs, Expression::Reference(loc, new_name))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -4,8 +4,30 @@ use std::fmt;
|
|||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
/// A single token of the input stream; used to help the parsing go down
|
||||||
|
/// more easily.
|
||||||
|
///
|
||||||
|
/// The key way to generate this structure is via the [`Logos`] trait.
|
||||||
|
/// See the [`logos`] documentation for more information; we use the
|
||||||
|
/// [`Token::lexer`] function internally.
|
||||||
|
///
|
||||||
|
/// The first step in the compilation process is turning the raw string
|
||||||
|
/// data (in UTF-8, which is its own joy) in to a sequence of more sensible
|
||||||
|
/// tokens. Here, for example, we turn "x=5" into three tokens: a
|
||||||
|
/// [`Token::Variable`] for "x", a [`Token::Equals`] for the "=", and
|
||||||
|
/// then a [`Token::Number`] for the "5". Later on, we'll worry about
|
||||||
|
/// making sense of those three tokens.
|
||||||
|
///
|
||||||
|
/// For now, our list of tokens is relatively straightforward. We'll
|
||||||
|
/// need/want to extend these later.
|
||||||
|
///
|
||||||
|
/// The [`std::fmt::Display`] implementation for [`Token`] should
|
||||||
|
/// round-trip; if you lex a string generated with the [`std::fmt::Display`]
|
||||||
|
/// trait, you should get back the exact same token.
|
||||||
#[derive(Logos, Clone, Debug, PartialEq, Eq)]
|
#[derive(Logos, Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
|
// Our first set of tokens are simple characters that we're
|
||||||
|
// going to use to structure NGR programs.
|
||||||
#[token("=")]
|
#[token("=")]
|
||||||
Equals,
|
Equals,
|
||||||
|
|
||||||
@@ -18,12 +40,20 @@ pub enum Token {
|
|||||||
#[token(")")]
|
#[token(")")]
|
||||||
RightParen,
|
RightParen,
|
||||||
|
|
||||||
|
// Next we take of any reserved words; I always like to put
|
||||||
|
// these before we start recognizing more complicated regular
|
||||||
|
// expressions. I don't think it matters, but it works for me.
|
||||||
#[token("print")]
|
#[token("print")]
|
||||||
Print,
|
Print,
|
||||||
|
|
||||||
|
// Next are the operators for NGR. We only have 4, now, but
|
||||||
|
// we might extend these later, or even make them user-definable!
|
||||||
#[regex(r"[+\-*/]", |v| v.slice().chars().next())]
|
#[regex(r"[+\-*/]", |v| v.slice().chars().next())]
|
||||||
Operator(char),
|
Operator(char),
|
||||||
|
|
||||||
|
/// Numbers capture both the value we read from the input,
|
||||||
|
/// converted to an `i64`, as well as the base the user used
|
||||||
|
/// to write the number, if they did so.
|
||||||
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
||||||
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
||||||
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
||||||
@@ -31,12 +61,23 @@ pub enum Token {
|
|||||||
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
||||||
Number((Option<u8>, i64)),
|
Number((Option<u8>, i64)),
|
||||||
|
|
||||||
|
// Variables; this is a very standard, simple set of characters
|
||||||
|
// for variables, but feel free to experiment with more complicated
|
||||||
|
// things. I chose to force variables to start with a lower case
|
||||||
|
// letter, too.
|
||||||
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
|
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
|
||||||
Variable(ArcIntern<String>),
|
Variable(ArcIntern<String>),
|
||||||
|
|
||||||
|
// the next token will be an error token
|
||||||
#[error]
|
#[error]
|
||||||
|
// we're actually just going to skip whitespace, though
|
||||||
#[regex(r"[ \t\r\n\f]+", logos::skip)]
|
#[regex(r"[ \t\r\n\f]+", logos::skip)]
|
||||||
|
// this is an extremely simple version of comments, just line
|
||||||
|
// comments. More complicated /* */ comments can be harder to
|
||||||
|
// implement, and didn't seem worth it at the time.
|
||||||
#[regex(r"//.*", logos::skip)]
|
#[regex(r"//.*", logos::skip)]
|
||||||
|
/// This token represents that some core error happened in lexing;
|
||||||
|
/// possibly that something didn't match anything at all.
|
||||||
Error,
|
Error,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,19 +104,28 @@ impl fmt::Display for Token {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A sudden and unexpected error in the lexer.
|
||||||
#[derive(Debug, Error, PartialEq, Eq)]
|
#[derive(Debug, Error, PartialEq, Eq)]
|
||||||
pub enum LexerError {
|
pub enum LexerError {
|
||||||
|
/// The `usize` here is the offset that we ran into the problem, given
|
||||||
|
/// from the start of the file.
|
||||||
#[error("Failed lexing at {0}")]
|
#[error("Failed lexing at {0}")]
|
||||||
LexFailure(usize),
|
LexFailure(usize),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
impl Token {
|
impl Token {
|
||||||
|
/// Create a variable token with the given name. Very handy for
|
||||||
|
/// testing.
|
||||||
pub(crate) fn var(s: &str) -> Token {
|
pub(crate) fn var(s: &str) -> Token {
|
||||||
Token::Variable(ArcIntern::new(s.to_string()))
|
Token::Variable(ArcIntern::new(s.to_string()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a number in the given base, return a pair of the base and the
|
||||||
|
/// parsed number. This is just a helper used for all of the number
|
||||||
|
/// regular expression cases, which kicks off to the obvious Rust
|
||||||
|
/// standard library function.
|
||||||
fn parse_number(
|
fn parse_number(
|
||||||
base: Option<u8>,
|
base: Option<u8>,
|
||||||
value: &Lexer<Token>,
|
value: &Lexer<Token>,
|
||||||
|
|||||||
@@ -2,6 +2,13 @@ use crate::syntax::{Expression, Location, Program, Statement};
|
|||||||
use codespan_reporting::diagnostic::Diagnostic;
|
use codespan_reporting::diagnostic::Diagnostic;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// An error we found while validating the input program.
|
||||||
|
///
|
||||||
|
/// These errors indicate that we should stop trying to compile
|
||||||
|
/// the program, because it's just fundamentally broken in a way
|
||||||
|
/// that we're not going to be able to work through. As with most
|
||||||
|
/// of these errors, we recommend converting this to a [`Diagnostic`]
|
||||||
|
/// and using [`codespan_reporting`] to present them to the user.
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
UnboundVariable(Location, String),
|
UnboundVariable(Location, String),
|
||||||
}
|
}
|
||||||
@@ -16,6 +23,13 @@ impl From<Error> for Diagnostic<usize> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A problem we found validating the input that isn't critical.
|
||||||
|
///
|
||||||
|
/// These are things that the user might want to do something about,
|
||||||
|
/// but we can keep going without it being a problem. As with most of
|
||||||
|
/// these things, if you want to present this information to the user,
|
||||||
|
/// the best way to do so is via [`From`] and [`Diagnostic`], and then
|
||||||
|
/// interactions via [`codespan_reporting`].
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Warning {
|
pub enum Warning {
|
||||||
ShadowedVariable(Location, Location, String),
|
ShadowedVariable(Location, Location, String),
|
||||||
@@ -37,6 +51,11 @@ impl From<Warning> for Diagnostic<usize> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Program {
|
impl Program {
|
||||||
|
/// Validate that the program makes semantic sense, not just syntactic sense.
|
||||||
|
///
|
||||||
|
/// This checks for things like references to variables that don't exist, for
|
||||||
|
/// example, and generates warnings for things that are inadvisable but not
|
||||||
|
/// actually a problem.
|
||||||
pub fn validate(&self) -> (Vec<Error>, Vec<Warning>) {
|
pub fn validate(&self) -> (Vec<Error>, Vec<Warning>) {
|
||||||
let mut errors = vec![];
|
let mut errors = vec![];
|
||||||
let mut warnings = vec![];
|
let mut warnings = vec![];
|
||||||
@@ -53,6 +72,15 @@ impl Program {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Statement {
|
impl Statement {
|
||||||
|
/// Validate that the statement makes semantic sense, not just syntactic sense.
|
||||||
|
///
|
||||||
|
/// This checks for things like references to variables that don't exist, for
|
||||||
|
/// example, and generates warnings for things that are inadvisable but not
|
||||||
|
/// actually a problem. Since statements appear in a broader context, you'll
|
||||||
|
/// need to provide the set of variables that are bound where this statement
|
||||||
|
/// occurs. We use a `HashMap` to map these bound locations to the locations
|
||||||
|
/// where their bound, because these locations are handy when generating errors
|
||||||
|
/// and warnings.
|
||||||
pub fn validate(
|
pub fn validate(
|
||||||
&self,
|
&self,
|
||||||
bound_variables: &mut HashMap<String, Location>,
|
bound_variables: &mut HashMap<String, Location>,
|
||||||
|
|||||||
Reference in New Issue
Block a user