diff --git a/src/backend/error.rs b/src/backend/error.rs index 3eb3118..caa9e59 100644 --- a/src/backend/error.rs +++ b/src/backend/error.rs @@ -2,8 +2,27 @@ use crate::backend::runtime::RuntimeFunctionError; use codespan_reporting::diagnostic::Diagnostic; use cranelift_codegen::{isa::LookupError, settings::SetError, CodegenError}; use cranelift_module::ModuleError; +use internment::ArcIntern; use thiserror::Error; +/// An error in the translation to a backend (either the JIT or the static compiler). +/// +/// In general, this is just a nice summary error type for a bunch of downstream +/// errors; the exception are internal errors from builtin functions or variable +/// lookups. +/// +/// Unlike some other errors in the system, the translation to a `Diagnostic` does +/// not necessarily provide a whole lot of value, because we have lost most of the +/// source information by the time we're generating these errors. That being said, +/// people who want to provide nicer error messages might consider using the +/// translation through `Diagnostic` anyways, just in case we add more information +/// in the future. +/// +/// Finally, the `PartialEq` for this function is a bit fuzzy. In some cases, it +/// ensures that the errors match exactly. In other cases, though, it just checks to +/// see if the two errors are of the same class; e.g., it will return true if both +/// errors are `BackendError::CodegenError`, regardless of what the specific +/// `CodegenError` is. #[derive(Debug, Error)] pub enum BackendError { #[error("Cranelift module error: {0}")] @@ -11,7 +30,7 @@ pub enum BackendError { #[error("Builtin function error: {0}")] BuiltinError(#[from] RuntimeFunctionError), #[error("Internal variable lookup error")] - VariableLookupFailure, + VariableLookupFailure(ArcIntern), #[error(transparent)] CodegenError(#[from] CodegenError), #[error(transparent)] @@ -31,9 +50,8 @@ impl From for Diagnostic { BackendError::BuiltinError(me) => { Diagnostic::error().with_message(format!("Internal runtime function error: {}", me)) } - BackendError::VariableLookupFailure => { - Diagnostic::error().with_message("Internal variable lookup error!") - } + BackendError::VariableLookupFailure(x) => Diagnostic::error() + .with_message(format!("Internal variable lookup error for {}", x)), BackendError::CodegenError(me) => { Diagnostic::error().with_message(format!("Internal codegen error: {}", me)) } @@ -58,8 +76,12 @@ impl PartialEq for BackendError { _ => false, }, + // because the underlying `CodegenError` doesn't implement `PartialEq', + // we just check that they're both `CodegenError`s. BackendError::CodegenError(_) => matches!(other, BackendError::CodegenError(_)), + // because the underlying `ModuleError` doesn't implement `PartialEq', + // we just check that they're both `Cranelift`s. BackendError::Cranelift(_) => matches!(other, BackendError::Cranelift(_)), BackendError::LookupError(a) => match other { @@ -72,7 +94,10 @@ impl PartialEq for BackendError { _ => false, }, - BackendError::VariableLookupFailure => other == &BackendError::VariableLookupFailure, + BackendError::VariableLookupFailure(a) => match other { + BackendError::VariableLookupFailure(b) => a == b, + _ => false, + }, BackendError::Write(a) => match other { BackendError::Write(b) => a == b, diff --git a/src/backend/eval.rs b/src/backend/eval.rs index a57c625..6df2f5e 100644 --- a/src/backend/eval.rs +++ b/src/backend/eval.rs @@ -8,6 +8,19 @@ use cranelift_object::ObjectModule; use target_lexicon::Triple; impl Backend { + /// Evaluate the given IR, returning the output it prints. + /// + /// This builds and executes the program using the JIT backend, using a fresh JIT runtime + /// that should be independent of any other runtimes being executed. As such, it should be + /// impossible for a program being executed by this function to interact with another, parallel + /// execution of the function. If you actually want them to interact, you'll need to combine + /// them into the same `Program` before execution. + /// + /// One important note: The runtime used by this function does not currently implement + /// overflow/underflow erroring the same way that other evaluation functions within this + /// library do. So, if you're validating equivalence between them, you'll want to weed + /// out examples that overflow/underflow before checking equivalence. (This is the behavior + /// of the built-in test systems.) pub fn eval(program: Program) -> Result { let mut jitter = Backend::jit(Some(String::new()))?; let function_id = jitter.compile_function("test", program)?; @@ -20,6 +33,20 @@ impl Backend { } impl Backend { + /// Evalute the given IR, returning the output it prints. + /// + /// This build the program as a standalone object in a temporary directory, and then links + /// and runs it using the provided runtime system (see `CARGO_MANIFEST_DIR/runtime/`). To + /// do so, it assumes that there is a version of `clang` available in the current PATH. + /// + /// This routine is regularly tested under Windows, Mac, and Linux, and should work across + /// other platforms that support `clang`. + /// + /// One important note: The runtime used by this function does not currently implement + /// overflow/underflow erroring the same way that other evaluation functions within this + /// library do. So, if you're validating equivalence between them, you'll want to weed + /// out examples that overflow/underflow before checking equivalence. (This is the behavior + /// of the built-in test systems.) pub fn eval(program: Program) -> Result { //use pretty::{Arena, Pretty}; //let allocator = Arena::<()>::new(); @@ -52,6 +79,17 @@ impl Backend { } } + /// Link the generated object into an executable. + /// + /// Currently, our runtime system is a single file, and ends up being the function + /// that includes `main`. (It then calls the `gogogo` function which serves as the + /// entry point for our compiled code.) This function thus just uses `clang` to + /// compile the C file with the generated object file to produce the executable. + /// Conveniently, `clang` also sets execute permissions under unix-like file systems. + /// + /// This function assumes that this compilation and linking should run without any + /// output, so changes to the RTS should make 100% sure that they do not generate + /// any compiler warnings. fn link(object_file: &Path, executable_path: &Path) -> Result<(), EvalError> { use std::path::PathBuf; @@ -77,12 +115,17 @@ impl Backend { } proptest::proptest! { + // This is the obvious test to make sure that our static compilation path works + // without error, assuming any possible input ... well, any possible input that + // doesn't involve overflow or underflow. #[test] - fn file_backend_works(program: Program) { + fn static_backend(program: Program) { use crate::eval::PrimOpError; let basic_result = program.eval(); + // windows `printf` is going to terminate lines with "\r\n", so we need to adjust + // our test result here. #[cfg(target_family="windows")] let basic_result = basic_result.map(|x| x.replace('\n', "\r\n")); @@ -92,8 +135,11 @@ proptest::proptest! { } } + // This is the obvious test to make sure that our JIT compilation path works + // without error, assuming any possible input ... well, any possible input that + // doesn't involve overflow or underflow. #[test] - fn jit_backend_works(program: Program) { + fn jit_backend(program: Program) { use crate::eval::PrimOpError; let basic_result = program.eval(); diff --git a/src/backend/into_crane.rs b/src/backend/into_crane.rs index 6ceff02..5965a9f 100644 --- a/src/backend/into_crane.rs +++ b/src/backend/into_crane.rs @@ -8,15 +8,31 @@ use cranelift_codegen::ir::{ use cranelift_codegen::isa::CallConv; use cranelift_codegen::Context; use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable}; -use cranelift_module::{FuncId, Linkage, Module, ModuleError}; +use cranelift_module::{FuncId, Linkage, Module}; use internment::ArcIntern; use crate::backend::error::BackendError; use crate::backend::Backend; +/// When we're compiling, we might need to reference some of the strings built into +/// the source code; to do so, we need a `GlobalValue`. Perhaps unexpectedly, given +/// the name, `GlobalValue`s are specific to a single function we're compiling, so +/// we end up computing this table for every function. +/// +/// This just a handy type alias to avoid a lot of confusion in the functions. type StringTable = HashMap, GlobalValue>; impl Backend { + /// Compile the given `Program` into a function with the given name. + /// + /// At some point, the use of `Program` is going to change; however, for the + /// moment, we have no notion of a function in our language so the whole input + /// is converted into a single output function. The type of the generated + /// function is, essentially, `fn() -> ()`: it takes no arguments and returns + /// no value. + /// + /// The function provided can then be either written to a file (if using a + /// static Cranelift backend) or executed directly (if using the Cranelift JIT). pub fn compile_function( &mut self, function_name: &str, @@ -28,21 +44,47 @@ impl Backend { call_conv: CallConv::SystemV, }; + // this generates the handle for the function that we'll eventually want to + // return to the user. For now, we declare all functions defined by this + // function as public/global/exported, although we may want to reconsider + // this decision later. let func_id = self.module .declare_function(function_name, Linkage::Export, &basic_signature)?; - let mut ctx = Context::new(); - ctx.func = - Function::with_name_signature(UserFuncName::user(0, func_id.as_u32()), basic_signature); + // Next we have to generate the compilation context for the rest of this + // function. Currently, we generate a fresh context for every function. + // Since we're only generating one function per `Program`, this makes + // complete sense. However, in the future, we may want to revisit this + // decision. + let mut ctx = Context::new(); + let user_func_name = UserFuncName::user(0, func_id.as_u32()); + ctx.func = Function::with_name_signature(user_func_name, basic_signature); + + // We generate a table of every string that we use in the program, here. + // Cranelift is going to require us to have this in a particular structure + // (`GlobalValue`) so that we can reference them later, and it's going to + // be tricky to generate those on the fly. So we just generate the set we + // need here, and then have ir around in the table for later. let string_table = self.build_string_table(&mut ctx.func, &program)?; - let mut variable_table = HashMap::new(); - let mut next_var_num = 1; + + // In the future, we might want to see what runtime functions the function + // we were given uses, and then only include those functions that we care + // about. Presumably, we'd use some sort of lookup table like we do for + // strings. But for now, we only have one runtime function, and we're pretty + // sure we're always going to use it, so we just declare it (and reference + // it) directly. let print_func_ref = self.runtime_functions.include_runtime_function( "print", &mut self.module, &mut ctx.func, )?; + + // In the case of the JIT, there may be symbols we've already defined outside + // the context of this particular `Progam`, which we might want to reference. + // Just like with strings, generating the `GlobalValue`s we need can potentially + // be a little tricky to do on the fly, so we generate the complete list right + // here and then use it later. let pre_defined_symbols: HashMap = self .defined_symbols .iter() @@ -52,67 +94,88 @@ impl Backend { }) .collect(); + // The last table we're going to need is our local variable table, to store + // variables used in this `Program` but not used outside of it. For whatever + // reason, Cranelift requires us to generate unique indexes for each of our + // variables; we just use a simple incrementing counter for that. + let mut variable_table = HashMap::new(); + let mut next_var_num = 1; + + // Finally (!), we generate the function builder that we're going to use to + // make this function! let mut fctx = FunctionBuilderContext::new(); let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx); + + // Make the initial block to put instructions in. Later, when we have control + // flow, we might add more blocks after this one. But, for now, we only have + // the one block. let main_block = builder.create_block(); builder.switch_to_block(main_block); + // Compiling a function is just compiling each of the statements in order. + // At the moment, we do the pattern match for statements here, and then + // directly compile the statements. If/when we add more statement forms, + // this is likely to become more cumbersome, and we'll want to separate + // these off. But for now, given the amount of tables we keep around to track + // state, it's easier to just include them. for stmt in program.statements.drain(..) { match stmt { + // Print statements are fairly easy to compile: we just lookup the + // output buffer, the address of the string to print, and the value + // of whatever variable we're printing. Then we just call print. Statement::Print(ann, var) => { + // Get the output buffer (or null) from our general compilation context. let buffer_ptr = self.output_buffer_ptr(); let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64); + + // Get a reference to the string we want to print. let local_name_ref = string_table.get(&var).unwrap(); let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref); - let val = ValueOrRef::Ref(ann, var).into_cranelift( + + // Look up the value for the variable. Because this might be a + // global variable (and that requires special logic), we just turn + // this into an `Expression` and re-use the logic in that implementation. + let val = Expression::Reference(ann, var).into_crane( &mut builder, &variable_table, &pre_defined_symbols, )?; + + // Finally, we can generate the call to print. builder .ins() .call(print_func_ref, &[buffer_ptr, name_ptr, val]); } + // Variable binding is a little more con Statement::Binding(_, var_name, value) => { - let val = match value { - Expression::Value(_, Value::Number(_, v)) => { - builder.ins().iconst(types::I64, v) - } - - Expression::Reference(_, name) => { - let value_var_num = variable_table.get(&name).unwrap(); - builder.use_var(Variable::new(*value_var_num)) - } - - Expression::Primitive(_, prim, mut vals) => { - let right = vals.pop().unwrap().into_cranelift( - &mut builder, - &variable_table, - &pre_defined_symbols, - )?; - let left = vals.pop().unwrap().into_cranelift( - &mut builder, - &variable_table, - &pre_defined_symbols, - )?; - - match prim { - Primitive::Plus => builder.ins().iadd(left, right), - Primitive::Minus => builder.ins().isub(left, right), - Primitive::Times => builder.ins().imul(left, right), - Primitive::Divide => builder.ins().sdiv(left, right), - } - } - }; + // Kick off to the `Expression` implementation to see what value we're going + // to bind to this variable. + let val = + value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?; + // Now the question is: is this a local variable, or a global one? if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) { + // It's a global variable! In this case, we assume that someone has already + // dedicated some space in memory to store this value. We look this location + // up, and then tell Cranelift to store the value there. let val_ptr = builder.ins().symbol_value(types::I64, *global_id); builder.ins().store(MemFlags::new(), val, val_ptr, 0); } else { + // It's a local variable! In this case, we need to allocate a new Cranelift + // `Variable` for this variable, which we do using our `next_var_num` counter. + // (While we're doing this, we also increment `next_var_num`, so that we get + // a fresh `Variable` next time. This is one of those very narrow cases in which + // I wish Rust had an increment expression.) let var = Variable::new(next_var_num); - variable_table.insert(var_name, next_var_num); next_var_num += 1; + + // We can add the variable directly to our local variable map; it's `Copy`. + variable_table.insert(var_name, var); + + // Now we tell Cranelift about our new variable, which has type I64 because + // everything we have at this point is of type I64. Once it's declare, we + // define it as having the value we computed above. builder.declare_var(var, types::I64); builder.def_var(var, val); } @@ -120,15 +183,30 @@ impl Backend { } } + // Now that we're done, inject a return function (one with no actual value; basically + // the equivalent of Rust's `return;`). We then seal the block (which lets Cranelift + // know that the block is done), and then finalize the function (which lets Cranelift + // know we're done with the function). builder.ins().return_(&[]); builder.seal_block(main_block); builder.finalize(); + // This is a little odd. We want to tell the rest of Cranelift about this function, + // so we register it using the function ID and our builder context. However, the + // result of this function isn't actually super helpful. So we ignore it, unless + // it's an error. let _ = self.module.define_function(func_id, &mut ctx)?; + // done! Ok(func_id) } + // Build the string table for use in referencing strings later. + // + // This function is slightly smart, in that it only puts strings in the table that + // are used by the `Program`. (Thanks to `Progam::strings()`!) If the strings have + // been declared globally, via `Backend::define_string()`, we will re-use that data. + // Otherwise, this will define the string for you. fn build_string_table( &mut self, func: &mut Function, @@ -149,30 +227,73 @@ impl Backend { } } -impl ValueOrRef { - fn into_cranelift( +impl Expression { + fn into_crane( self, builder: &mut FunctionBuilder, - local_variables: &HashMap, usize>, + local_variables: &HashMap, Variable>, global_variables: &HashMap, - ) -> Result { + ) -> Result { match self { - ValueOrRef::Value(_, value) => match value { - Value::Number(_base, numval) => Ok(builder.ins().iconst(types::I64, numval)), - }, + // Values are pretty straightforward to compile, mostly because we only + // have one type of variable, and it's an integer type. + Expression::Value(_, Value::Number(_, v)) => Ok(builder.ins().iconst(types::I64, v)), - ValueOrRef::Ref(_, name) => { - if let Some(local_num) = local_variables.get(&name) { - return Ok(builder.use_var(Variable::new(*local_num))); + Expression::Reference(_, name) => { + // first we see if this is a local variable (which is nicer, from an + // optimization point of view.) + if let Some(local_var) = local_variables.get(&name) { + return Ok(builder.use_var(*local_var)); } - if let Some(global_id) = global_variables.get(name.as_str()) { - let val_ptr = builder.ins().symbol_value(types::I64, *global_id); + // then we check to see if this is a global reference, which requires us to + // first lookup where the value is stored, and then load it. + if let Some(global_var) = global_variables.get(name.as_ref()) { + let val_ptr = builder.ins().symbol_value(types::I64, *global_var); return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0)); } - Err(ModuleError::Undeclared(name.to_string())) + // this should never happen, because we should have made sure that there are + // no unbound variables a long time before this. but still ... + Err(BackendError::VariableLookupFailure(name)) + } + + Expression::Primitive(_, prim, mut vals) => { + // we're going to use `pop`, so we're going to pull and compile the right value ... + let right = + vals.pop() + .unwrap() + .into_crane(builder, local_variables, global_variables)?; + // ... and then the left. + let left = + vals.pop() + .unwrap() + .into_crane(builder, local_variables, global_variables)?; + + // then we just need to tell Cranelift how to do each of our primitives! Much + // like Statements, above, we probably want to eventually shuffle this off into + // a separate function (maybe something off `Primitive`), but for now it's simple + // enough that we just do the `match` here. + match prim { + Primitive::Plus => Ok(builder.ins().iadd(left, right)), + Primitive::Minus => Ok(builder.ins().isub(left, right)), + Primitive::Times => Ok(builder.ins().imul(left, right)), + Primitive::Divide => Ok(builder.ins().sdiv(left, right)), + } } } } } + +// Just to avoid duplication, this just leverages the `From` trait implementation +// for `ValueOrRef` to compile this via the `Expression` logic, above. +impl ValueOrRef { + fn into_crane( + self, + builder: &mut FunctionBuilder, + local_variables: &HashMap, Variable>, + global_variables: &HashMap, + ) -> Result { + Expression::from(self).into_crane(builder, local_variables, global_variables) + } +} diff --git a/src/backend/runtime.rs b/src/backend/runtime.rs index 1338a73..a03acf7 100644 --- a/src/backend/runtime.rs +++ b/src/backend/runtime.rs @@ -8,9 +8,14 @@ use std::fmt::Write; use target_lexicon::Triple; use thiserror::Error; +/// An object for querying / using functions built into the runtime. +/// +/// Right now, this is a quite a bit of boilerplate for very nebulous +/// value. However, as the number of built-in functions gets large, it's +/// nice to have a single point to register and query them, so here we +/// go. pub struct RuntimeFunctions { builtin_functions: HashMap, - _referenced_functions: Vec, } #[derive(Debug, Error, PartialEq)] @@ -19,25 +24,27 @@ pub enum RuntimeFunctionError { CannotFindRuntimeFunction(String), } -extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) { - let cstr = unsafe { CStr::from_ptr(name) }; - let reconstituted = cstr.to_string_lossy(); - - if let Some(output_buffer) = unsafe { output_buffer.as_mut() } { - writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap(); - } else { - println!("{} = {}", reconstituted, value); - } -} - impl RuntimeFunctions { + /// Generate a new runtime function table for the given platform, and + /// declare them within the provided Cranelift module. + /// + /// Note that this is very conservative: it assumes that your module + /// will want to use every runtime function. Unless the Cranelift object + /// builder is smart, this might inject a bunch of references (and thus + /// linker requirements) that aren't actually needed by your program. + /// + /// Then again, right now there's exactly one runtime function, so ... + /// not a big deal. pub fn new(platform: &Triple, module: &mut M) -> ModuleResult { let mut builtin_functions = HashMap::new(); - let _referenced_functions = Vec::new(); let string_param = AbiParam::new(types::I64); let int64_param = AbiParam::new(types::I64); + // declare print for Cranelift; it's something we're going to import + // into the current module (it's compiled separately), and takes two + // strings and an integer. (Which ... turn out to all be the same + // underlying type, which is weird but the way it is.) let print_id = module.declare_function( "print", Linkage::Import, @@ -47,14 +54,19 @@ impl RuntimeFunctions { call_conv: CallConv::triple_default(platform), }, )?; + + // Toss this function in our internal dictionary, as well. builtin_functions.insert("print".to_string(), print_id); - Ok(RuntimeFunctions { - builtin_functions, - _referenced_functions, - }) + Ok(RuntimeFunctions { builtin_functions }) } + /// Include the named runtime function into the current Function context. + /// + /// This is necessary for every runtime function reference within each + /// function. The returned `FuncRef` can be used in `call` invocations. + /// The only reason for this function to error is if you pass a name that + /// the runtime isn't familiar with. pub fn include_runtime_function( &self, name: &str, @@ -69,7 +81,30 @@ impl RuntimeFunctions { } } + /// Register live, local versions of the runtime functions into the JIT. + /// + /// Note that these implementations are *not* the same as the ones defined + /// in `CARGO_MANIFEST_DIR/runtime/`, for ... reasons. It might be a good + /// change, in the future, to find a way to unify these implementations into + /// one; both to reduce the chance that they deviate, and to reduce overall + /// maintenance burden. pub fn register_jit_implementations(builder: &mut JITBuilder) { builder.symbol("print", runtime_print as *const u8); } } + +// Print! This implementation is used in the JIT compiler, to actually print data. We +// use the `output_buffer` argument as an aid for testing; if it's non-NULL, it's a string +// we extend with the output, so that multiple JIT'd `Program`s can run concurrently +// without stomping over each other's output. If `output_buffer` is NULL, we just print +// to stdout. +extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) { + let cstr = unsafe { CStr::from_ptr(name) }; + let reconstituted = cstr.to_string_lossy(); + + if let Some(output_buffer) = unsafe { output_buffer.as_mut() } { + writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap(); + } else { + println!("{} = {}", reconstituted, value); + } +} diff --git a/src/ir/ast.rs b/src/ir/ast.rs index ad96e95..6e46538 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -163,6 +163,15 @@ where } } +impl From for Expression { + fn from(value: ValueOrRef) -> Self { + match value { + ValueOrRef::Value(loc, val) => Expression::Value(loc, val), + ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var), + } + } +} + #[derive(Debug)] pub enum Value { Number(Option, i64),