2023-05-13 12:34:48 -07:00
28 changed files with 1550 additions and 432 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,7 @@ cranelift-module = "0.94.0"
 cranelift-native = "0.94.0"
 cranelift-object = "0.94.0"
 internment = { version = "0.7.0", default-features = false, features = ["arc"] }
-lalrpop-util = "^0.19.7"
+lalrpop-util = "^0.20.0"
 lazy_static = "^1.4.0"
 logos = "^0.12.0"
 pretty = { version = "^0.11.2", features = ["termcolor"] }
@@ -30,4 +30,4 @@ tempfile = "^3.5.0"
 thiserror = "^1.0.30"

 [build-dependencies]
-lalrpop = "^0.19.7"
+lalrpop = "^0.20.0"
--- a/src/backend.rs
+++ b/src/backend.rs
@@ -1,3 +1,31 @@
+//! # The compiler backend: generation of machine code, both static and JIT.
+//!
+//! This module is responsible for taking our intermediate representation from
+//! [`crate::ir`] and turning it into Cranelift and then into object code that
+//! can either be saved to disk or run in memory. Because the runtime functions
+//! for NGR are very closely tied to the compiler implentation, we also include
+//! information about these functions as part of the module.
+//!
+//! ## Using the `Backend`
+//!
+//! The backend of this compiler can be used in two modes: a static compilation
+//! mode, where the goal is to write the compiled object to disk and then link
+//! it later, and a JIT mode, where the goal is to write the compiled object to
+//! memory and then run it. Both modes use the same `Backend` object, because
+//! they share a lot of behaviors. However, you'll want to use different variants
+//! based on your goals:
+//!
+//!   * Use `Backend<ObjectModule>`, constructed via [`Backend::object_file`],
+//!     if you want to compile to an object file on disk, which you're then going
+//!     to link to later.
+//!   * Use `Backend<JITModule>`, constructed via [`Backend::jit`], if you want
+//!     to do just-in-time compilation and are just going to run things immediately.
+//!
+//! ## Working with Runtime Functions
+//!
+//! For now, runtime functions are pretty easy to describe, because there's
+//! only one. In the future, though, the [`RuntimeFunctions`] object is there to
+//! help provide a clean interface to them all.
 mod error;
 mod eval;
 mod into_crane;
@@ -16,6 +44,15 @@ use target_lexicon::Triple;

 const EMPTY_DATUM: [u8; 8] = [0; 8];

+/// An object representing an active backend.
+///
+/// Internally, this object holds a bunch of state useful for compiling one
+/// or more functions into an object file or memory. It can be passed around,
+/// but cannot currently be duplicated because some of that state is not
+/// easily duplicated. You should be able to share this across threads, assuming
+/// normal Rust safety, but you should be thoughtful about transferring it across
+/// processes in a JIT context due to some special cases in the runtime function
+/// implementations.
 pub struct Backend<M: Module> {
    pub module: M,
    data_ctx: DataContext,
@@ -26,6 +63,12 @@ pub struct Backend<M: Module> {
 }

 impl Backend<JITModule> {
+    /// Create a new JIT backend for compiling NGR into memory.
+    ///
+    /// The provided output buffer is not for the compiled code, but for the output
+    /// of any `print` expressions that are evaluated. If set to `None`, the output
+    /// will be written to `stdout` as per normal, but if a String buffer is provided,
+    /// it will be extended by any `print` statements that happen during code execution.
    pub fn jit(output_buffer: Option<String>) -> Result<Self, BackendError> {
        let platform = Triple::host();
        let isa_builder = isa::lookup(platform.clone())?;
@@ -50,12 +93,24 @@ impl Backend<JITModule> {
        })
    }

+    /// Given a compiled function ID, get a pointer to where that function was written
+    /// in memory.
+    ///
+    /// The data at this pointer should not be mutated unless you really, really,
+    /// really know what you're doing. It can be run by casting it into a Rust
+    /// `fn() -> ()`, and then calling it from normal Rust.
    pub fn bytes(&self, function_id: FuncId) -> *const u8 {
        self.module.get_finalized_function(function_id)
    }
 }

 impl Backend<ObjectModule> {
+    /// Generate a backend for compiling into an object file for the given target.
+    ///
+    /// This backend will generate a single output file per `Backend` object, although
+    /// that file may have multiple functions defined within it. Data between those
+    /// functions (in particular, strings) will be defined once and shared between
+    /// the different functions.
    pub fn object_file(platform: Triple) -> Result<Self, BackendError> {
        let isa_builder = isa::lookup(platform.clone())?;
        let mut settings_builder = settings::builder();
@@ -76,12 +131,22 @@ impl Backend<ObjectModule> {
        })
    }

+    /// Given all the functions defined, return the bytes the object file should contain.
    pub fn bytes(self) -> Result<Vec<u8>, BackendError> {
        self.module.finish().emit().map_err(Into::into)
    }
 }

 impl<M: Module> Backend<M> {
+    /// Define a string within the current backend.
+    ///
+    /// Note that this is a Cranelift [`DataId`], which then must be redeclared inside the
+    /// context of any functions or data items that want to use it. That being said, the
+    /// string value will be defined once in the file and then shared by all referencers.
+    ///
+    /// This function will automatically add a null character (`'\0'`) to the end of the
+    /// string, to ensure that strings are non-terminated for interactions with other
+    /// languages.
    pub fn define_string(&mut self, s: &str) -> Result<DataId, BackendError> {
        let name = format!("<string_constant>{}", s);
        let s0 = format!("{}\0", s);
@@ -97,6 +162,11 @@ impl<M: Module> Backend<M> {
        Ok(global_id)
    }

+    /// Define a global variable within the current backend.
+    ///
+    /// These variables can be shared between functions, and will be exported from the
+    /// module itself as public data in the case of static compilation. There initial
+    /// value will be null.
    pub fn define_variable(&mut self, name: String) -> Result<DataId, BackendError> {
        self.data_ctx.define(Box::new(EMPTY_DATUM));
        let id = self
@@ -108,6 +178,11 @@ impl<M: Module> Backend<M> {
        Ok(id)
    }

+    /// Get a pointer to the output buffer for `print`ing, or `null`.
+    ///
+    /// As suggested, returns `null` in the case where the user has not provided an
+    /// output buffer; it is your responsibility to check for this case and do
+    /// something sensible.
    pub fn output_buffer_ptr(&mut self) -> *mut String {
        if let Some(str) = self.output_buffer.as_mut() {
            str as *mut String
@@ -116,6 +191,10 @@ impl<M: Module> Backend<M> {
        }
    }

+    /// Get any captured output `print`ed by the program during execution.
+    ///
+    /// If an output buffer was not provided, or if the program has not done any
+    /// printing, then this function will return an empty string.
    pub fn output(self) -> String {
        if let Some(s) = self.output_buffer {
            s
--- a/src/backend/error.rs
+++ b/src/backend/error.rs
@@ -2,8 +2,27 @@ use crate::backend::runtime::RuntimeFunctionError;
 use codespan_reporting::diagnostic::Diagnostic;
 use cranelift_codegen::{isa::LookupError, settings::SetError, CodegenError};
 use cranelift_module::ModuleError;
+use internment::ArcIntern;
 use thiserror::Error;

+/// An error in the translation to a backend (either the JIT or the static compiler).
+///
+/// In general, this is just a nice summary error type for a bunch of downstream
+/// errors; the exception are internal errors from builtin functions or variable
+/// lookups.
+///
+/// Unlike some other errors in the system, the translation to a `Diagnostic` does
+/// not necessarily provide a whole lot of value, because we have lost most of the
+/// source information by the time we're generating these errors. That being said,
+/// people who want to provide nicer error messages might consider using the
+/// translation through `Diagnostic` anyways, just in case we add more information
+/// in the future.
+///
+/// Finally, the `PartialEq` for this function is a bit fuzzy. In some cases, it
+/// ensures that the errors match exactly. In other cases, though, it just checks to
+/// see if the two errors are of the same class; e.g., it will return true if both
+/// errors are `BackendError::CodegenError`, regardless of what the specific
+/// `CodegenError` is.
 #[derive(Debug, Error)]
 pub enum BackendError {
    #[error("Cranelift module error: {0}")]
@@ -11,7 +30,7 @@ pub enum BackendError {
    #[error("Builtin function error: {0}")]
    BuiltinError(#[from] RuntimeFunctionError),
    #[error("Internal variable lookup error")]
-    VariableLookupFailure,
+    VariableLookupFailure(ArcIntern<String>),
    #[error(transparent)]
    CodegenError(#[from] CodegenError),
    #[error(transparent)]
@@ -31,9 +50,8 @@ impl From<BackendError> for Diagnostic<usize> {
            BackendError::BuiltinError(me) => {
                Diagnostic::error().with_message(format!("Internal runtime function error: {}", me))
            }
-            BackendError::VariableLookupFailure => {
-                Diagnostic::error().with_message("Internal variable lookup error!")
-            }
+            BackendError::VariableLookupFailure(x) => Diagnostic::error()
+                .with_message(format!("Internal variable lookup error for {}", x)),
            BackendError::CodegenError(me) => {
                Diagnostic::error().with_message(format!("Internal codegen error: {}", me))
            }
@@ -58,8 +76,12 @@ impl PartialEq for BackendError {
                _ => false,
            },

+            // because the underlying `CodegenError` doesn't implement `PartialEq',
+            // we just check that they're both `CodegenError`s.
            BackendError::CodegenError(_) => matches!(other, BackendError::CodegenError(_)),

+            // because the underlying `ModuleError` doesn't implement `PartialEq',
+            // we just check that they're both `Cranelift`s.
            BackendError::Cranelift(_) => matches!(other, BackendError::Cranelift(_)),

            BackendError::LookupError(a) => match other {
@@ -72,7 +94,10 @@ impl PartialEq for BackendError {
                _ => false,
            },

-            BackendError::VariableLookupFailure => other == &BackendError::VariableLookupFailure,
+            BackendError::VariableLookupFailure(a) => match other {
+                BackendError::VariableLookupFailure(b) => a == b,
+                _ => false,
+            },

            BackendError::Write(a) => match other {
                BackendError::Write(b) => a == b,
--- a/src/backend/eval.rs
+++ b/src/backend/eval.rs
@@ -8,6 +8,19 @@ use cranelift_object::ObjectModule;
 use target_lexicon::Triple;

 impl Backend<JITModule> {
+    /// Evaluate the given IR, returning the output it prints.
+    ///
+    /// This builds and executes the program using the JIT backend, using a fresh JIT runtime
+    /// that should be independent of any other runtimes being executed. As such, it should be
+    /// impossible for a program being executed by this function to interact with another, parallel
+    /// execution of the function. If you actually want them to interact, you'll need to combine
+    /// them into the same `Program` before execution.
+    ///
+    /// One important note: The runtime used by this function does not currently implement
+    /// overflow/underflow erroring the same way that other evaluation functions within this
+    /// library do. So, if you're validating equivalence between them, you'll want to weed
+    /// out examples that overflow/underflow before checking equivalence. (This is the behavior
+    /// of the built-in test systems.)
    pub fn eval(program: Program) -> Result<String, EvalError> {
        let mut jitter = Backend::jit(Some(String::new()))?;
        let function_id = jitter.compile_function("test", program)?;
@@ -20,6 +33,20 @@ impl Backend<JITModule> {
 }

 impl Backend<ObjectModule> {
+    /// Evalute the given IR, returning the output it prints.
+    ///
+    /// This build the program as a standalone object in a temporary directory, and then links
+    /// and runs it using the provided runtime system (see `CARGO_MANIFEST_DIR/runtime/`). To
+    /// do so, it assumes that there is a version of `clang` available in the current PATH.
+    ///
+    /// This routine is regularly tested under Windows, Mac, and Linux, and should work across
+    /// other platforms that support `clang`.
+    ///
+    /// One important note: The runtime used by this function does not currently implement
+    /// overflow/underflow erroring the same way that other evaluation functions within this
+    /// library do. So, if you're validating equivalence between them, you'll want to weed
+    /// out examples that overflow/underflow before checking equivalence. (This is the behavior
+    /// of the built-in test systems.)
    pub fn eval(program: Program) -> Result<String, EvalError> {
        //use pretty::{Arena, Pretty};
        //let allocator = Arena::<()>::new();
@@ -40,18 +67,26 @@ impl Backend<ObjectModule> {
            if output.status.success() {
                Ok(std::string::String::from_utf8_lossy(&output.stdout).to_string())
            } else {
-                Err(EvalError::IO(format!(
-                    "Exitted with error code {}",
-                    output.status
-                )))
+                Err(EvalError::ExitCode(output.status))
            }
        } else {
-            Err(EvalError::IO(
+            Err(EvalError::RuntimeOutput(
                std::string::String::from_utf8_lossy(&output.stderr).to_string(),
            ))
        }
    }

+    /// Link the generated object into an executable.
+    ///
+    /// Currently, our runtime system is a single file, and ends up being the function
+    /// that includes `main`. (It then calls the `gogogo` function which serves as the
+    /// entry point for our compiled code.) This function thus just uses `clang` to
+    /// compile the C file with the generated object file to produce the executable.
+    /// Conveniently, `clang` also sets execute permissions under unix-like file systems.
+    ///
+    /// This function assumes that this compilation and linking should run without any
+    /// output, so changes to the RTS should make 100% sure that they do not generate
+    /// any compiler warnings.
    fn link(object_file: &Path, executable_path: &Path) -> Result<(), EvalError> {
        use std::path::PathBuf;

@@ -67,7 +102,7 @@ impl Backend<ObjectModule> {
            .output()?;

        if !output.stderr.is_empty() {
-            return Err(EvalError::IO(
+            return Err(EvalError::Linker(
                std::string::String::from_utf8_lossy(&output.stderr).to_string(),
            ));
        }
@@ -77,12 +112,17 @@ impl Backend<ObjectModule> {
 }

 proptest::proptest! {
+    // This is the obvious test to make sure that our static compilation path works
+    // without error, assuming any possible input ... well, any possible input that
+    // doesn't involve overflow or underflow.
    #[test]
-    fn file_backend_works(program: Program) {
+    fn static_backend(program: Program) {
        use crate::eval::PrimOpError;

        let basic_result = program.eval();

+        // windows `printf` is going to terminate lines with "\r\n", so we need to adjust
+        // our test result here.
        #[cfg(target_family="windows")]
        let basic_result = basic_result.map(|x| x.replace('\n', "\r\n"));

@@ -92,8 +132,11 @@ proptest::proptest! {
        }
    }

+    // This is the obvious test to make sure that our JIT compilation path works
+    // without error, assuming any possible input ... well, any possible input that
+    // doesn't involve overflow or underflow.
    #[test]
-    fn jit_backend_works(program: Program) {
+    fn jit_backend(program: Program) {
        use crate::eval::PrimOpError;

        let basic_result = program.eval();
--- a/src/backend/into_crane.rs
+++ b/src/backend/into_crane.rs
@@ -8,15 +8,31 @@ use cranelift_codegen::ir::{
 use cranelift_codegen::isa::CallConv;
 use cranelift_codegen::Context;
 use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
-use cranelift_module::{FuncId, Linkage, Module, ModuleError};
+use cranelift_module::{FuncId, Linkage, Module};
 use internment::ArcIntern;

 use crate::backend::error::BackendError;
 use crate::backend::Backend;

+/// When we're compiling, we might need to reference some of the strings built into
+/// the source code; to do so, we need a `GlobalValue`. Perhaps unexpectedly, given
+/// the name, `GlobalValue`s are specific to a single function we're compiling, so
+/// we end up computing this table for every function.
+///
+/// This just a handy type alias to avoid a lot of confusion in the functions.
 type StringTable = HashMap<ArcIntern<String>, GlobalValue>;

 impl<M: Module> Backend<M> {
+    /// Compile the given `Program` into a function with the given name.
+    ///
+    /// At some point, the use of `Program` is going to change; however, for the
+    /// moment, we have no notion of a function in our language so the whole input
+    /// is converted into a single output function. The type of the generated
+    /// function is, essentially, `fn() -> ()`: it takes no arguments and returns
+    /// no value.
+    ///
+    /// The function provided can then be either written to a file (if using a
+    /// static Cranelift backend) or executed directly (if using the Cranelift JIT).
    pub fn compile_function(
        &mut self,
        function_name: &str,
@@ -28,21 +44,47 @@ impl<M: Module> Backend<M> {
            call_conv: CallConv::SystemV,
        };

+        // this generates the handle for the function that we'll eventually want to
+        // return to the user. For now, we declare all functions defined by this
+        // function as public/global/exported, although we may want to reconsider
+        // this decision later.
        let func_id =
            self.module
                .declare_function(function_name, Linkage::Export, &basic_signature)?;
-        let mut ctx = Context::new();
-        ctx.func =
-            Function::with_name_signature(UserFuncName::user(0, func_id.as_u32()), basic_signature);

+        // Next we have to generate the compilation context for the rest of this
+        // function. Currently, we generate a fresh context for every function.
+        // Since we're only generating one function per `Program`, this makes
+        // complete sense. However, in the future, we may want to revisit this
+        // decision.
+        let mut ctx = Context::new();
+        let user_func_name = UserFuncName::user(0, func_id.as_u32());
+        ctx.func = Function::with_name_signature(user_func_name, basic_signature);
+
+        // We generate a table of every string that we use in the program, here.
+        // Cranelift is going to require us to have this in a particular structure
+        // (`GlobalValue`) so that we can reference them later, and it's going to
+        // be tricky to generate those on the fly. So we just generate the set we
+        // need here, and then have ir around in the table for later.
        let string_table = self.build_string_table(&mut ctx.func, &program)?;
-        let mut variable_table = HashMap::new();
-        let mut next_var_num = 1;
+
+        // In the future, we might want to see what runtime functions the function
+        // we were given uses, and then only include those functions that we care
+        // about. Presumably, we'd use some sort of lookup table like we do for
+        // strings. But for now, we only have one runtime function, and we're pretty
+        // sure we're always going to use it, so we just declare it (and reference
+        // it) directly.
        let print_func_ref = self.runtime_functions.include_runtime_function(
            "print",
            &mut self.module,
            &mut ctx.func,
        )?;
+
+        // In the case of the JIT, there may be symbols we've already defined outside
+        // the context of this particular `Progam`, which we might want to reference.
+        // Just like with strings, generating the `GlobalValue`s we need can potentially
+        // be a little tricky to do on the fly, so we generate the complete list right
+        // here and then use it later.
        let pre_defined_symbols: HashMap<String, GlobalValue> = self
            .defined_symbols
            .iter()
@@ -52,67 +94,88 @@ impl<M: Module> Backend<M> {
            })
            .collect();

+        // The last table we're going to need is our local variable table, to store
+        // variables used in this `Program` but not used outside of it. For whatever
+        // reason, Cranelift requires us to generate unique indexes for each of our
+        // variables; we just use a simple incrementing counter for that.
+        let mut variable_table = HashMap::new();
+        let mut next_var_num = 1;
+
+        // Finally (!), we generate the function builder that we're going to use to
+        // make this function!
        let mut fctx = FunctionBuilderContext::new();
        let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx);
+
+        // Make the initial block to put instructions in. Later, when we have control
+        // flow, we might add more blocks after this one. But, for now, we only have
+        // the one block.
        let main_block = builder.create_block();
        builder.switch_to_block(main_block);

+        // Compiling a function is just compiling each of the statements in order.
+        // At the moment, we do the pattern match for statements here, and then
+        // directly compile the statements. If/when we add more statement forms,
+        // this is likely to become more cumbersome, and we'll want to separate
+        // these off. But for now, given the amount of tables we keep around to track
+        // state, it's easier to just include them.
        for stmt in program.statements.drain(..) {
            match stmt {
+                // Print statements are fairly easy to compile: we just lookup the
+                // output buffer, the address of the string to print, and the value
+                // of whatever variable we're printing. Then we just call print.
                Statement::Print(ann, var) => {
+                    // Get the output buffer (or null) from our general compilation context.
                    let buffer_ptr = self.output_buffer_ptr();
                    let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64);
+
+                    // Get a reference to the string we want to print.
                    let local_name_ref = string_table.get(&var).unwrap();
                    let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref);
-                    let val = ValueOrRef::Ref(ann, var).into_cranelift(
+
+                    // Look up the value for the variable. Because this might be a
+                    // global variable (and that requires special logic), we just turn
+                    // this into an `Expression` and re-use the logic in that implementation.
+                    let val = Expression::Reference(ann, var).into_crane(
                        &mut builder,
                        &variable_table,
                        &pre_defined_symbols,
                    )?;
+
+                    // Finally, we can generate the call to print.
                    builder
                        .ins()
                        .call(print_func_ref, &[buffer_ptr, name_ptr, val]);
                }

+                // Variable binding is a little more con
                Statement::Binding(_, var_name, value) => {
-                    let val = match value {
-                        Expression::Value(_, Value::Number(_, v)) => {
-                            builder.ins().iconst(types::I64, v)
-                        }
-
-                        Expression::Reference(_, name) => {
-                            let value_var_num = variable_table.get(&name).unwrap();
-                            builder.use_var(Variable::new(*value_var_num))
-                        }
-
-                        Expression::Primitive(_, prim, mut vals) => {
-                            let right = vals.pop().unwrap().into_cranelift(
-                                &mut builder,
-                                &variable_table,
-                                &pre_defined_symbols,
-                            )?;
-                            let left = vals.pop().unwrap().into_cranelift(
-                                &mut builder,
-                                &variable_table,
-                                &pre_defined_symbols,
-                            )?;
-
-                            match prim {
-                                Primitive::Plus => builder.ins().iadd(left, right),
-                                Primitive::Minus => builder.ins().isub(left, right),
-                                Primitive::Times => builder.ins().imul(left, right),
-                                Primitive::Divide => builder.ins().sdiv(left, right),
-                            }
-                        }
-                    };
+                    // Kick off to the `Expression` implementation to see what value we're going
+                    // to bind to this variable.
+                    let val =
+                        value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?;

+                    // Now the question is: is this a local variable, or a global one?
                    if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) {
+                        // It's a global variable! In this case, we assume that someone has already
+                        // dedicated some space in memory to store this value. We look this location
+                        // up, and then tell Cranelift to store the value there.
                        let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
                        builder.ins().store(MemFlags::new(), val, val_ptr, 0);
                    } else {
+                        // It's a local variable! In this case, we need to allocate a new Cranelift
+                        // `Variable` for this variable, which we do using our `next_var_num` counter.
+                        // (While we're doing this, we also increment `next_var_num`, so that we get
+                        // a fresh `Variable` next time. This is one of those very narrow cases in which
+                        // I wish Rust had an increment expression.)
                        let var = Variable::new(next_var_num);
-                        variable_table.insert(var_name, next_var_num);
                        next_var_num += 1;
+
+                        // We can add the variable directly to our local variable map; it's `Copy`.
+                        variable_table.insert(var_name, var);
+
+                        // Now we tell Cranelift about our new variable, which has type I64 because
+                        // everything we have at this point is of type I64. Once it's declare, we
+                        // define it as having the value we computed above.
                        builder.declare_var(var, types::I64);
                        builder.def_var(var, val);
                    }
@@ -120,15 +183,30 @@ impl<M: Module> Backend<M> {
            }
        }

+        // Now that we're done, inject a return function (one with no actual value; basically
+        // the equivalent of Rust's `return;`). We then seal the block (which lets Cranelift
+        // know that the block is done), and then finalize the function (which lets Cranelift
+        // know we're done with the function).
        builder.ins().return_(&[]);
        builder.seal_block(main_block);
        builder.finalize();

+        // This is a little odd. We want to tell the rest of Cranelift about this function,
+        // so we register it using the function ID and our builder context. However, the
+        // result of this function isn't actually super helpful. So we ignore it, unless
+        // it's an error.
        let _ = self.module.define_function(func_id, &mut ctx)?;

+        // done!
        Ok(func_id)
    }

+    // Build the string table for use in referencing strings later.
+    //
+    // This function is slightly smart, in that it only puts strings in the table that
+    // are used by the `Program`. (Thanks to `Progam::strings()`!) If the strings have
+    // been declared globally, via `Backend::define_string()`, we will re-use that data.
+    // Otherwise, this will define the string for you.
    fn build_string_table(
        &mut self,
        func: &mut Function,
@@ -149,30 +227,73 @@ impl<M: Module> Backend<M> {
    }
 }

-impl ValueOrRef {
-    fn into_cranelift(
+impl Expression {
+    fn into_crane(
        self,
        builder: &mut FunctionBuilder,
-        local_variables: &HashMap<ArcIntern<String>, usize>,
+        local_variables: &HashMap<ArcIntern<String>, Variable>,
        global_variables: &HashMap<String, GlobalValue>,
-    ) -> Result<entities::Value, ModuleError> {
+    ) -> Result<entities::Value, BackendError> {
        match self {
-            ValueOrRef::Value(_, value) => match value {
-                Value::Number(_base, numval) => Ok(builder.ins().iconst(types::I64, numval)),
-            },
+            // Values are pretty straightforward to compile, mostly because we only
+            // have one type of variable, and it's an integer type.
+            Expression::Value(_, Value::Number(_, v)) => Ok(builder.ins().iconst(types::I64, v)),

-            ValueOrRef::Ref(_, name) => {
-                if let Some(local_num) = local_variables.get(&name) {
-                    return Ok(builder.use_var(Variable::new(*local_num)));
+            Expression::Reference(_, name) => {
+                // first we see if this is a local variable (which is nicer, from an
+                // optimization point of view.)
+                if let Some(local_var) = local_variables.get(&name) {
+                    return Ok(builder.use_var(*local_var));
                }

-                if let Some(global_id) = global_variables.get(name.as_str()) {
-                    let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
+                // then we check to see if this is a global reference, which requires us to
+                // first lookup where the value is stored, and then load it.
+                if let Some(global_var) = global_variables.get(name.as_ref()) {
+                    let val_ptr = builder.ins().symbol_value(types::I64, *global_var);
                    return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0));
                }

-                Err(ModuleError::Undeclared(name.to_string()))
+                // this should never happen, because we should have made sure that there are
+                // no unbound variables a long time before this. but still ...
+                Err(BackendError::VariableLookupFailure(name))
+            }
+
+            Expression::Primitive(_, prim, mut vals) => {
+                // we're going to use `pop`, so we're going to pull and compile the right value ...
+                let right =
+                    vals.pop()
+                        .unwrap()
+                        .into_crane(builder, local_variables, global_variables)?;
+                // ... and then the left.
+                let left =
+                    vals.pop()
+                        .unwrap()
+                        .into_crane(builder, local_variables, global_variables)?;
+
+                // then we just need to tell Cranelift how to do each of our primitives! Much
+                // like Statements, above, we probably want to eventually shuffle this off into
+                // a separate function (maybe something off `Primitive`), but for now it's simple
+                // enough that we just do the `match` here.
+                match prim {
+                    Primitive::Plus => Ok(builder.ins().iadd(left, right)),
+                    Primitive::Minus => Ok(builder.ins().isub(left, right)),
+                    Primitive::Times => Ok(builder.ins().imul(left, right)),
+                    Primitive::Divide => Ok(builder.ins().sdiv(left, right)),
+                }
            }
        }
    }
 }
+
+// Just to avoid duplication, this just leverages the `From<ValueOrRef>` trait implementation
+// for `ValueOrRef` to compile this via the `Expression` logic, above.
+impl ValueOrRef {
+    fn into_crane(
+        self,
+        builder: &mut FunctionBuilder,
+        local_variables: &HashMap<ArcIntern<String>, Variable>,
+        global_variables: &HashMap<String, GlobalValue>,
+    ) -> Result<entities::Value, BackendError> {
+        Expression::from(self).into_crane(builder, local_variables, global_variables)
+    }
+}
--- a/src/backend/runtime.rs
+++ b/src/backend/runtime.rs
@@ -8,9 +8,14 @@ use std::fmt::Write;
 use target_lexicon::Triple;
 use thiserror::Error;

+/// An object for querying / using functions built into the runtime.
+///
+/// Right now, this is a quite a bit of boilerplate for very nebulous
+/// value. However, as the number of built-in functions gets large, it's
+/// nice to have a single point to register and query them, so here we
+/// go.
 pub struct RuntimeFunctions {
    builtin_functions: HashMap<String, FuncId>,
-    _referenced_functions: Vec<String>,
 }

 #[derive(Debug, Error, PartialEq)]
@@ -19,25 +24,27 @@ pub enum RuntimeFunctionError {
    CannotFindRuntimeFunction(String),
 }

-extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) {
-    let cstr = unsafe { CStr::from_ptr(name) };
-    let reconstituted = cstr.to_string_lossy();
-
-    if let Some(output_buffer) = unsafe { output_buffer.as_mut() } {
-        writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap();
-    } else {
-        println!("{} = {}", reconstituted, value);
-    }
-}
-
 impl RuntimeFunctions {
+    /// Generate a new runtime function table for the given platform, and
+    /// declare them within the provided Cranelift module.
+    ///
+    /// Note that this is very conservative: it assumes that your module
+    /// will want to use every runtime function. Unless the Cranelift object
+    /// builder is smart, this might inject a bunch of references (and thus
+    /// linker requirements) that aren't actually needed by your program.
+    ///
+    /// Then again, right now there's exactly one runtime function, so ...
+    /// not a big deal.
    pub fn new<M: Module>(platform: &Triple, module: &mut M) -> ModuleResult<RuntimeFunctions> {
        let mut builtin_functions = HashMap::new();
-        let _referenced_functions = Vec::new();

        let string_param = AbiParam::new(types::I64);
        let int64_param = AbiParam::new(types::I64);

+        // declare print for Cranelift; it's something we're going to import
+        // into the current module (it's compiled separately), and takes two
+        // strings and an integer. (Which ... turn out to all be the same
+        // underlying type, which is weird but the way it is.)
        let print_id = module.declare_function(
            "print",
            Linkage::Import,
@@ -47,14 +54,19 @@ impl RuntimeFunctions {
                call_conv: CallConv::triple_default(platform),
            },
        )?;
+
+        // Toss this function in our internal dictionary, as well.
        builtin_functions.insert("print".to_string(), print_id);

-        Ok(RuntimeFunctions {
-            builtin_functions,
-            _referenced_functions,
-        })
+        Ok(RuntimeFunctions { builtin_functions })
    }

+    /// Include the named runtime function into the current Function context.
+    ///
+    /// This is necessary for every runtime function reference within each
+    /// function. The returned `FuncRef` can be used in `call` invocations.
+    /// The only reason for this function to error is if you pass a name that
+    /// the runtime isn't familiar with.
    pub fn include_runtime_function<M: Module>(
        &self,
        name: &str,
@@ -69,7 +81,30 @@ impl RuntimeFunctions {
        }
    }

+    /// Register live, local versions of the runtime functions into the JIT.
+    ///
+    /// Note that these implementations are *not* the same as the ones defined
+    /// in `CARGO_MANIFEST_DIR/runtime/`, for ... reasons. It might be a good
+    /// change, in the future, to find a way to unify these implementations into
+    /// one; both to reduce the chance that they deviate, and to reduce overall
+    /// maintenance burden.
    pub fn register_jit_implementations(builder: &mut JITBuilder) {
        builder.symbol("print", runtime_print as *const u8);
    }
 }
+
+// Print! This implementation is used in the JIT compiler, to actually print data. We
+// use the `output_buffer` argument as an aid for testing; if it's non-NULL, it's a string
+// we extend with the output, so that multiple JIT'd `Program`s can run concurrently
+// without stomping over each other's output. If `output_buffer` is NULL, we just print
+// to stdout.
+extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) {
+    let cstr = unsafe { CStr::from_ptr(name) };
+    let reconstituted = cstr.to_string_lossy();
+
+    if let Some(output_buffer) = unsafe { output_buffer.as_mut() } {
+        writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap();
+    } else {
+        println!("{} = {}", reconstituted, value);
+    }
+}
--- a/src/bin/ngrc.rs
+++ b/src/bin/ngrc.rs
@@ -1,17 +1,7 @@
 use clap::Parser;
-use codespan_reporting::diagnostic::Diagnostic;
-use codespan_reporting::files::SimpleFiles;
-use codespan_reporting::term;
-use codespan_reporting::term::termcolor::{ColorChoice, StandardStream};
-use cranelift_object::object;
-
-use ngr::backend::Backend;
-use ngr::backend::BackendError;
-use ngr::ir::Program as IR;
-use ngr::syntax::{ParserError, Program as Syntax};
-use target_lexicon::Triple;
-use thiserror::Error;

+/// Clap is great! Even though we don't have many command line arguments
+/// yet, this is just really neat.
 #[derive(Parser, Debug)]
 #[clap(author, version, about, long_about = None)]
 struct CommandLineArguments {
@@ -23,76 +13,14 @@ struct CommandLineArguments {
    file: String,
 }

-#[derive(Debug, Error)]
-enum MainError {
-    #[error(transparent)]
-    Backend(#[from] BackendError),
-    #[error("Parser error")]
-    ParserError(#[from] ParserError),
-    #[error("IO error")]
-    IoError(#[from] std::io::Error),
-    #[error("write error")]
-    WriteError(#[from] object::write::Error),
-}
-
-impl From<MainError> for Diagnostic<usize> {
-    fn from(value: MainError) -> Self {
-        match value {
-            MainError::Backend(be) => be.into(),
-            MainError::ParserError(pe) => (&pe).into(),
-            MainError::IoError(e) => Diagnostic::error().with_message(format!("IO error: {}", e)),
-            MainError::WriteError(e) => {
-                Diagnostic::error().with_message(format!("Module write error: {}", e))
-            }
-        }
-    }
-}
-
-fn compile(file_database: &mut SimpleFiles<String, String>) -> Result<(), MainError> {
-    let args = CommandLineArguments::parse();
-
-    let syntax = Syntax::parse_file(file_database, &args.file)?;
-    let (mut errors, mut warnings) = syntax.validate();
-    let stop = !errors.is_empty();
-    let messages = errors
-        .drain(..)
-        .map(Into::into)
-        .chain(warnings.drain(..).map(Into::into));
-    let writer = StandardStream::stderr(ColorChoice::Auto);
-    let config = codespan_reporting::term::Config::default();
-
-    for message in messages {
-        term::emit(&mut writer.lock(), &config, file_database, &message).unwrap();
-    }
-
-    if stop {
-        return Ok(());
-    }
-
-    let ir = IR::from(syntax.simplify());
-    let mut backend = Backend::object_file(Triple::host())?;
-    backend.compile_function("gogogo", ir)?;
-    let bytes = backend.bytes()?;
-    std::fs::write(args.output.unwrap_or_else(|| "output.o".to_string()), bytes)?;
-    Ok(())
-}
-
 fn main() {
-    let mut file_database = SimpleFiles::new();
+    let args = CommandLineArguments::parse();
+    let mut compiler = ngr::Compiler::default();

-    match compile(&mut file_database) {
-        Ok(()) => {}
-        Err(e) => {
-            let writer = StandardStream::stderr(ColorChoice::Auto);
-            let config = codespan_reporting::term::Config::default();
+    let output_file = args.output.unwrap_or("output.o".to_string());

-            term::emit(
-                &mut writer.lock(),
-                &config,
-                &file_database,
-                &Diagnostic::from(e),
-            )
-            .unwrap();
-        }
+    if let Some(bytes) = compiler.compile(&args.file) {
+        std::fs::write(&output_file, bytes)
+            .unwrap_or_else(|x| eprintln!("Could not write to file {}: {}", output_file, x));
    }
 }
--- a/src/bin/ngri.rs
+++ b/src/bin/ngri.rs
@@ -1,130 +1,11 @@
-use codespan_reporting::diagnostic::Diagnostic;
-use codespan_reporting::files::SimpleFiles;
-use codespan_reporting::term::{self, Config};
-use cranelift_jit::JITModule;
-use cranelift_module::ModuleError;
-use ngr::backend::{Backend, BackendError};
-use ngr::ir::Program as IR;
-use ngr::syntax::{Location, ParserError, Statement};
-use pretty::termcolor::{ColorChoice, StandardStream, WriteColor};
+use ngr::backend::BackendError;
 use rustyline::error::ReadlineError;
 use rustyline::DefaultEditor;
-use std::collections::HashMap;
-
-pub struct RunLoop<'a> {
-    file_database: SimpleFiles<&'a str, String>,
-    jitter: Backend<JITModule>,
-    variable_binding_sites: HashMap<String, Location>,
-    gensym_index: usize,
-    writer: &'a mut dyn WriteColor,
-    config: Config,
-}
-
-#[allow(clippy::upper_case_acronyms)]
-#[derive(Debug, thiserror::Error)]
-enum REPLError {
-    #[error("Error parsing statement: {0}")]
-    Parser(#[from] ParserError),
-    #[error("JIT error: {0}")]
-    JIT(#[from] BackendError),
-    #[error("Internal cranelift error: {0}")]
-    Cranelift(#[from] ModuleError),
-    #[error(transparent)]
-    Reporting(#[from] codespan_reporting::files::Error),
-}
-
-impl From<REPLError> for Diagnostic<usize> {
-    fn from(value: REPLError) -> Self {
-        match value {
-            REPLError::Parser(err) => Diagnostic::from(&err),
-            REPLError::JIT(err) => Diagnostic::from(err),
-            REPLError::Cranelift(err) => Diagnostic::bug().with_message(format!("{}", err)),
-            REPLError::Reporting(err) => Diagnostic::bug().with_message(format!("{}", err)),
-        }
-    }
-}
-
-impl<'a> RunLoop<'a> {
-    pub fn new(writer: &'a mut dyn WriteColor, config: Config) -> Result<Self, BackendError> {
-        Ok(RunLoop {
-            file_database: SimpleFiles::new(),
-            jitter: Backend::jit(None)?,
-            variable_binding_sites: HashMap::new(),
-            gensym_index: 1,
-            writer,
-            config,
-        })
-    }
-
-    fn emit_diagnostic(
-        &mut self,
-        diagnostic: Diagnostic<usize>,
-    ) -> Result<(), codespan_reporting::files::Error> {
-        term::emit(self.writer, &self.config, &self.file_database, &diagnostic)
-    }
-
-    fn process_input(&mut self, line_no: usize, command: String) {
-        if let Err(err) = self.process(line_no, command) {
-            if let Err(e) = self.emit_diagnostic(Diagnostic::from(err)) {
-                eprintln!(
-                    "WOAH! System having trouble printing error messages. This is very bad. ({})",
-                    e
-                );
-            }
-        }
-    }
-
-    fn process(&mut self, line_no: usize, command: String) -> Result<(), REPLError> {
-        let entry = self.file_database.add("entry", command);
-        let source = self
-            .file_database
-            .get(entry)
-            .expect("entry exists")
-            .source();
-        let syntax = Statement::parse(entry, source)?;
-
-        // if this is a variable binding, and we've never defined this variable before,
-        // we should tell cranelift about it. this is optimistic; if we fail to compile,
-        // then we won't use this definition until someone tries again.
-        if let Statement::Binding(_, ref name, _) = syntax {
-            if !self.variable_binding_sites.contains_key(name.as_str()) {
-                self.jitter.define_string(name)?;
-                self.jitter.define_variable(name.clone())?;
-            }
-        };
-
-        let (mut errors, mut warnings) = syntax.validate(&mut self.variable_binding_sites);
-        let stop = !errors.is_empty();
-        let messages = errors
-            .drain(..)
-            .map(Into::into)
-            .chain(warnings.drain(..).map(Into::into));
-
-        for message in messages {
-            self.emit_diagnostic(message)?;
-        }
-
-        if stop {
-            return Ok(());
-        }
-
-        let ir = IR::from(syntax.simplify(&mut self.gensym_index));
-        let name = format!("line{}", line_no);
-        let function_id = self.jitter.compile_function(&name, ir)?;
-        self.jitter.module.finalize_definitions()?;
-        let compiled_bytes = self.jitter.bytes(function_id);
-        let compiled_function = unsafe { std::mem::transmute::<_, fn() -> ()>(compiled_bytes) };
-        compiled_function();
-        Ok(())
-    }
-}

 fn main() -> Result<(), BackendError> {
    let mut editor = DefaultEditor::new().expect("rustyline works");
    let mut line_no = 0;
-    let mut writer = StandardStream::stdout(ColorChoice::Auto);
-    let config = codespan_reporting::term::Config::default();
-    let mut state = RunLoop::new(&mut writer, config)?;
+    let mut state = ngr::REPL::default();

    println!("No Good Reason, the Interpreter!");
    loop {
@@ -135,18 +16,30 @@ fn main() -> Result<(), BackendError> {
                ":quit" => break,
                _ => state.process_input(line_no, command),
            },
+
+            // it's not clear to me what this could be, but OK
            Err(ReadlineError::Io(e)) => {
                eprintln!("IO error: {}", e);
                break;
            }
+
+            // Control-D and Control-C
            Err(ReadlineError::Eof) => break,
            Err(ReadlineError::Interrupted) => break,
+
+            // For some reason this doesn't exist on Windows. I also don't quite know
+            // what would cause this, but ...
            #[cfg(not(windows))]
            Err(ReadlineError::Errno(e)) => {
                eprintln!("Unknown syscall error: {}", e);
                break;
            }
+
+            // We don't actually do any reflow-ing if we change the terminal size,
+            // so we can just ignore this.
            Err(ReadlineError::WindowResized) => continue,
+
+            // Why on earth are there so many error types?
            Err(e) => {
                eprintln!("Unknown internal error: {}", e);
                break;
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -0,0 +1,157 @@
+use crate::backend::Backend;
+use crate::ir::Program as IR;
+use crate::syntax::Program as Syntax;
+use codespan_reporting::{
+    diagnostic::Diagnostic,
+    files::SimpleFiles,
+    term::{self, Config},
+};
+use pretty::termcolor::{ColorChoice, StandardStream};
+use target_lexicon::Triple;
+
+/// A high-level compiler for NGR programs.
+///
+/// This object can be built once, and then re-used many times to build multiple
+/// files. For most users, the [`Default`] implementation should be sufficient;
+/// it will use `stderr` for warnings and errors, with default colors based on
+/// what we discover from the terminal. For those who want to provide alternate
+/// outputs, though, the `Compiler::new` constructor is available.
+pub struct Compiler {
+    file_database: SimpleFiles<String, String>,
+    console: StandardStream,
+    console_config: Config,
+}
+
+impl Default for Compiler {
+    fn default() -> Self {
+        let console = StandardStream::stderr(ColorChoice::Auto);
+        Compiler::new(console, Config::default())
+    }
+}
+
+impl Compiler {
+    /// Create a new compiler object.
+    ///
+    /// This object can be re-used to compile as many files as you like.
+    /// Use this function if you want to configure your output console and/or
+    /// its configuration in some custom way. Alternatively, you can use the
+    /// `Default` implementation, which will emit information to `stderr` with
+    /// a reasonable default configuration.
+    pub fn new(console: StandardStream, console_config: Config) -> Self {
+        Compiler {
+            file_database: SimpleFiles::new(),
+            console,
+            console_config,
+        }
+    }
+
+    /// Compile the given file, returning the object file as a vector of bytes.
+    ///
+    /// This function may create output, via the console configured with this
+    /// `Compiler` object. If the compilation fails for any reason, will return
+    /// `None`.
+    pub fn compile<P: AsRef<str>>(&mut self, input_file: P) -> Option<Vec<u8>> {
+        match self.compile_internal(input_file.as_ref()) {
+            Ok(x) => x,
+            Err(e) => {
+                self.emit(e.into());
+                None
+            }
+        }
+    }
+
+    /// This is the actual meat of the compilation chain; we hide it from the user
+    /// because the type is kind of unpleasant.
+    ///
+    /// The weird error type comes from the fact that we can run into three types
+    /// of result:
+    ///
+    ///    * Fundamental errors, like an incorrectly formatted file or some
+    ///      oddity with IO. These return `Err`.
+    ///    * Validation errors, where we reject the program due to something
+    ///      semantically wrong with them. These return `Ok(None)`.
+    ///    * Success! In this case, we return `Ok(Some(...))`, where the bytes
+    ///      returned is the contents of the compiled object file.
+    ///
+    fn compile_internal(&mut self, input_file: &str) -> Result<Option<Vec<u8>>, CompilerError> {
+        // Try to parse the file into our syntax AST. If we fail, emit the error
+        // and then immediately return `None`.
+        let syntax = Syntax::parse_file(&mut self.file_database, input_file)?;
+
+        // Now validate the user's syntax AST. This can possibly find errors and/or
+        // create warnings. We can continue if we only get warnings, but need to stop
+        // if we get any errors.
+        let (mut errors, mut warnings) = syntax.validate();
+        let stop = !errors.is_empty();
+        let messages = errors
+            .drain(..)
+            .map(Into::into)
+            .chain(warnings.drain(..).map(Into::into));
+
+        // emit all the messages we receive; warnings *and* errors
+        for message in messages {
+            self.emit(message);
+        }
+
+        // we got errors, so just stop right now. perhaps oddly, this is Ok(None);
+        // we've already said all we're going to say in the messags above, so there's
+        // no need to provide another `Err` result.
+        if stop {
+            return Ok(None);
+        }
+
+        // Now that we've validated it, turn it into IR.
+        let ir = IR::from(syntax);
+
+        // Finally, send all this to Cranelift for conversion into an object file.
+        let mut backend = Backend::object_file(Triple::host())?;
+        backend.compile_function("gogogo", ir)?;
+        Ok(Some(backend.bytes()?))
+    }
+
+    /// Emit a diagnostic.
+    ///
+    /// This is just a really handy shorthand we use elsewhere in the object, because
+    /// there's a lot of boilerplate we'd like to skip.
+    fn emit(&mut self, diagnostic: Diagnostic<usize>) {
+        term::emit(
+            &mut self.console.lock(),
+            &self.console_config,
+            &self.file_database,
+            &diagnostic,
+        )
+        .expect("codespan reporting term::emit works");
+    }
+}
+
+// This is just a handy type that we can convert things into; it's not
+// exposed outside this module, and doesn't actually do much of interest.
+#[derive(Debug, thiserror::Error)]
+enum CompilerError {
+    #[error(transparent)]
+    Backend(#[from] crate::backend::BackendError),
+    #[error(transparent)]
+    ParserError(#[from] crate::syntax::ParserError),
+    #[error(transparent)]
+    IoError(#[from] std::io::Error),
+    #[error(transparent)]
+    WriteError(#[from] cranelift_object::object::write::Error),
+}
+
+// Since we're going to use codespan to report pretty much all errors,
+// this just passes through most of the errors, or makes simple versions
+// of `Diagnostic` for those that we don't have existing `From`s.
+impl From<CompilerError> for Diagnostic<usize> {
+    fn from(value: CompilerError) -> Self {
+        match value {
+            CompilerError::Backend(be) => be.into(),
+            CompilerError::ParserError(pe) => (&pe).into(),
+            CompilerError::IoError(e) => {
+                Diagnostic::error().with_message(format!("IO error: {}", e))
+            }
+            CompilerError::WriteError(e) => {
+                Diagnostic::error().with_message(format!("Module write error: {}", e))
+            }
+        }
+    }
+}
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -1,3 +1,38 @@
+//! Helpful functions for evaluating NGR programs.
+//!
+//! Look, this is a compiler, and so you might be asking why it has a bunch of
+//! stuff in it to help with writing interpreters. Well, the answer is simple:
+//! testing. It's really nice to know that if you start with a program that
+//! does a thing, and then you muck with it, you end up with a program that does
+//! the exact same thing. If you talk to people who think about language
+//! semantics, they'll call this "observational equivalence": maybe the two
+//! programs don't do 100% the same things in the same order, but you shouldn't
+//! be able to observe the difference ... at least, not without a stopwatch,
+//! memory profilers, etc.
+//!
+//! The actual evaluators for our various syntaxes are hidden in `eval` functions
+//! of the various ASTs. It's nice to have them "next to" the syntax that way, so
+//! that we just edit stuff in one part of the source tree at a time. This module,
+//! then, just contains some things that are generally helpful across all the
+//! interpreters we've written.
+//!
+//! In particular, this module helps with:
+//!
+//!   * Defining a common error type -- [`EvalError`] -- that we can reasonably
+//!     compare. It's nice to compare errors, here, because we want to know that
+//!     if a program used to fail, it will still fail after we change it, and
+//!     fail in the exact same way.
+//!   * Defining a notion of a binding environment: [`EvalEnvironment`]. This
+//!     will help us keep track of variables bound in our program, as we run it.
+//!   * Defining a notion of a runtime value: [`Value`]. Yes, this is the
+//!     umpteenth time that we're re-defining basically the same enumeration
+//!     with exactly the same name, but it's nice to have it separated so that
+//!     we don't confuse them.
+//!   * Finally, this module implements all of our primitive functions, as the
+//!     [`Value::calculate`] function. This is just a nice abstraction boundary,
+//!     because the implementation of some parts of these primitives is really
+//!     awful to look at.
+//!     
 mod env;
 mod primop;
 mod value;
@@ -9,6 +44,13 @@ pub use value::Value;

 use crate::backend::BackendError;

+/// All of the errors that can happen trying to evaluate an NGR program.
+///
+/// This is yet another standard [`thiserror::Error`] type, but with the
+/// caveat that it implements [`PartialEq`] even though some of its
+/// constituent members don't. It does so through the very sketchy mechanism
+/// of converting those errors to strings and then seeing if they're the
+/// same.
 #[derive(Debug, thiserror::Error)]
 pub enum EvalError {
    #[error(transparent)]
@@ -18,15 +60,15 @@ pub enum EvalError {
    #[error(transparent)]
    Backend(#[from] BackendError),
    #[error("IO error: {0}")]
-    IO(String),
+    IO(#[from] std::io::Error),
    #[error(transparent)]
    Module(#[from] ModuleError),
-}
-
-impl From<std::io::Error> for EvalError {
-    fn from(value: std::io::Error) -> Self {
-        EvalError::IO(value.to_string())
-    }
+    #[error("Linker error: {0}")]
+    Linker(String),
+    #[error("Program exitted with status {0}")]
+    ExitCode(std::process::ExitStatus),
+    #[error("Unexpected output at runtime: {0}")]
+    RuntimeOutput(String),
 }

 impl PartialEq for EvalError {
@@ -48,7 +90,7 @@ impl PartialEq for EvalError {
            },

            EvalError::IO(a) => match other {
-                EvalError::IO(b) => a == b,
+                EvalError::IO(b) => a.to_string() == b.to_string(),
                _ => false,
            },

@@ -56,6 +98,21 @@ impl PartialEq for EvalError {
                EvalError::Module(b) => a.to_string() == b.to_string(),
                _ => false,
            },
+
+            EvalError::Linker(a) => match other {
+                EvalError::Linker(b) => a == b,
+                _ => false,
+            },
+
+            EvalError::ExitCode(a) => match other {
+                EvalError::ExitCode(b) => a == b,
+                _ => false,
+            },
+
+            EvalError::RuntimeOutput(a) => match other {
+                EvalError::RuntimeOutput(b) => a == b,
+                _ => false,
+            },
        }
    }
 }
--- a/src/eval/env.rs
+++ b/src/eval/env.rs
@@ -2,15 +2,28 @@ use crate::eval::Value;
 use internment::ArcIntern;
 use std::sync::Arc;

+/// An evaluation environment, which maps variable names to their
+/// current values.
+///
+/// One key difference between `EvalEnvironment` and `HashMap` is that
+/// `EvalEnvironment` uses an `extend` mechanism to add keys, rather
+/// than an `insert`. This difference allows you to add mappings for
+/// a subcomputation while still retaining the old version without those
+/// keys, which is really handy for implementing variable scoping.
 pub struct EvalEnvironment {
    inner: Arc<EvalEnvInternal>,
 }

-pub enum EvalEnvInternal {
+enum EvalEnvInternal {
    Empty,
    Value(ArcIntern<String>, Value, Arc<EvalEnvInternal>),
 }

+/// Errors that can happen when looking up a variable.
+///
+/// This enumeration may be extended in the future, depending on if we
+/// get more subtle with our keys. But for now, this is just a handy
+/// way to make lookup failures be `thiserror::Error`s.
 #[derive(Clone, Debug, PartialEq, thiserror::Error)]
 pub enum LookupError {
    #[error("Could not find variable '{0}' in environment")]
@@ -24,28 +37,38 @@ impl Default for EvalEnvironment {
 }

 impl EvalEnvironment {
+    /// Create a new, empty environment.
    pub fn empty() -> Self {
        EvalEnvironment {
            inner: Arc::new(EvalEnvInternal::Empty),
        }
    }

+    /// Extend the environment with a new mapping.
+    ///
+    /// Note the types: the result of this method is a new `EvalEnvironment`,
+    /// with its own lifetime, and the original environment is left unmodified.
    pub fn extend(&self, name: ArcIntern<String>, value: Value) -> Self {
        EvalEnvironment {
            inner: Arc::new(EvalEnvInternal::Value(name, value, self.inner.clone())),
        }
    }

+    /// Look up a variable in the environment, returning an error if it isn't there.
    pub fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
        self.inner.lookup(n)
    }
 }

 impl EvalEnvInternal {
+    /// Look up a variable in the environment, returning an error if it isn't there.
    fn lookup(&self, n: ArcIntern<String>) -> Result<Value, LookupError> {
        match self {
+            // if this is an empty dictionary, never mind, couldn't find it
            EvalEnvInternal::Empty => Err(LookupError::CouldNotFind(n)),
+            // is this the key we have right here? if yes, return our value
            EvalEnvInternal::Value(name, value, _) if *name == n => Ok(value.clone()),
+            // otherwise, recurse up our chain of environments
            EvalEnvInternal::Value(_, _, rest) => rest.lookup(n),
        }
    }
@@ -70,6 +93,9 @@ mod tests {
        assert!(tester.lookup(arced("baz")).is_err());
    }

+    // added this test to make sure that our nesting property works propertly.
+    // it's not a big deal now, but it'll be really handy later when we add any
+    // kind of variable scoping.
    #[test]
    fn nested() {
        let tester = EvalEnvironment::default();
--- a/src/eval/primop.rs
+++ b/src/eval/primop.rs
@@ -1,19 +1,39 @@
 use crate::eval::value::Value;

+/// Errors that can occur running primitive operations in the evaluators.
 #[derive(Clone, Debug, PartialEq, thiserror::Error)]
 pub enum PrimOpError {
    #[error("Math error (underflow or overflow) computing {0} operator")]
    MathFailure(&'static str),
+    /// This particular variant covers the case in which a primitive
+    /// operator takes two arguments that are supposed to be the same,
+    /// but they differ. (So, like, all the math operators.)
    #[error("Type mismatch ({1} vs {2}) computing {0} operator")]
    TypeMismatch(String, Value, Value),
+    /// This variant covers when an operator must take a particular
+    /// type, but the user has provided a different one.
    #[error("Bad type for operator {0}: {1}")]
    BadTypeFor(&'static str, Value),
+    /// Probably obvious from the name, but just to be very clear: this
+    /// happens when you pass three arguments to a two argument operator,
+    /// etc. Technically that's a type error of some sort, but we split
+    /// it out.
    #[error("Illegal number of arguments for {0}: {1} arguments found")]
    BadArgCount(String, usize),
    #[error("Unknown primitive operation {0}")]
    UnknownPrimOp(String),
 }

+// Implementing primitives in an interpreter like this is *super* tedious,
+// and the only way to make it even somewhat manageable is to use macros.
+// This particular macro works for binary operations, and assumes that
+// you've already worked out that the `calculate` call provided two arguments.
+//
+// In those cases, it will rul the operations we know about, and error if
+// it doesn't.
+//
+// This macro then needs to be instantiated for every type, which is super
+// fun.
 macro_rules! run_op {
    ($op: ident, $left: expr, $right: expr) => {
        match $op {
@@ -23,15 +43,15 @@ macro_rules! run_op {
                .map(Into::into),
            "-" => $left
                .checked_sub($right)
-                .ok_or(PrimOpError::MathFailure("+"))
+                .ok_or(PrimOpError::MathFailure("-"))
                .map(Into::into),
            "*" => $left
                .checked_mul($right)
-                .ok_or(PrimOpError::MathFailure("+"))
+                .ok_or(PrimOpError::MathFailure("*"))
                .map(Into::into),
            "/" => $left
                .checked_div($right)
-                .ok_or(PrimOpError::MathFailure("+"))
+                .ok_or(PrimOpError::MathFailure("/"))
                .map(Into::into),
            _ => Err(PrimOpError::UnknownPrimOp($op.to_string())),
        }
@@ -41,6 +61,8 @@ macro_rules! run_op {
 impl Value {
    fn binary_op(operation: &str, left: &Value, right: &Value) -> Result<Value, PrimOpError> {
        match left {
+            // for now we only have one type, but in the future this is
+            // going to be very irritating.
            Value::I64(x) => match right {
                Value::I64(y) => run_op!(operation, x, *y),
                //                _ => Err(PrimOpError::TypeMismatch(
@@ -52,6 +74,14 @@ impl Value {
        }
    }

+    /// Calculate the result of running the given primitive on the given arguments.
+    ///
+    /// This can cause errors in a whole mess of ways, so be careful about your
+    /// inputs. For example, addition only works when the two values have the exact
+    /// same type, so expect an error if you try to do so. In addition, this
+    /// implementation catches and raises an error on overflow or underflow, so
+    /// its worth being careful to make sure that your inputs won't cause either
+    /// condition.
    pub fn calculate(operation: &str, values: Vec<Value>) -> Result<Value, PrimOpError> {
        if values.len() == 2 {
            Value::binary_op(operation, &values[0], &values[1])
--- a/src/eval/value.rs
+++ b/src/eval/value.rs
@@ -1,5 +1,10 @@
 use std::fmt::Display;

+/// Values in the interpreter.
+///
+/// Yes, this is yet another definition of a structure called `Value`, which
+/// are almost entirely identical. However, it's nice to have them separated
+/// by type so that we don't mix them up.
 #[derive(Clone, Debug, PartialEq)]
 pub enum Value {
    I64(i64),
--- a/src/ir.rs
+++ b/src/ir.rs
@@ -1,3 +1,17 @@
+//! The middle of the compiler: analysis, simplification, optimization.
+//!
+//! For the moment, this module doesn't do much besides define an intermediate
+//! representation for NGR programs that is a little easier to work with then
+//! the structures we've built from the actual user syntax. For example, in the
+//! IR syntax, function calls are simplified so that all their arguments are
+//! either variables or constants, which can make reasoning about programs
+//! (and implicit temporary variables) quite a bit easier.
+//!
+//! For the foreseeable future, this module will likely remain mostly empty
+//! besides definitions, as we'll likely want to focus on just processing /
+//! validating syntax, and then figuring out how to turn it into Cranelift
+//! and object code. After that point, however, this will be the module to
+//! come to for analysis and optimization work.
 mod ast;
 mod eval;
 mod from_syntax;
--- a/src/ir/ast.rs
+++ b/src/ir/ast.rs
@@ -1,3 +1,4 @@
+use crate::syntax::Location;
 use internment::ArcIntern;
 use pretty::{DocAllocator, Pretty};
 use proptest::{
@@ -5,13 +6,28 @@ use proptest::{
    strategy::{BoxedStrategy, Strategy},
 };

-use crate::syntax::Location;
-
+/// We're going to represent variables as interned strings.
+///
+/// These should be fast enough for comparison that it's OK, since it's going to end up
+/// being pretty much the pointer to the string.
 type Variable = ArcIntern<String>;

+/// The representation of a program within our IR. For now, this is exactly one file.
+///
+/// In addition, for the moment there's not really much of interest to hold here besides
+/// the list of statements read from the file. Order is important. In the future, you
+/// could imagine caching analysis information in this structure.
+///
+/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
+/// to print the structure whenever possible, especially if you value your or your
+/// user's time. The latter is useful for testing that conversions of `Program` retain
+/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
+/// syntactically valid, although they may contain runtime issue like over- or underflow.
 #[derive(Debug)]
 pub struct Program {
-    pub statements: Vec<Statement>,
+    // For now, a program is just a vector of statements. In the future, we'll probably
+    // extend this to include a bunch of other information, but for now: just a list.
+    pub(crate) statements: Vec<Statement>,
 }

 impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
@@ -23,6 +39,8 @@ where
        let mut result = allocator.nil();

        for stmt in self.statements.iter() {
+            // there's probably a better way to do this, rather than constantly
+            // adding to the end, but this works.
            result = result
                .append(stmt.pretty(allocator))
                .append(allocator.text(";"))
@@ -39,11 +57,21 @@ impl Arbitrary for Program {

    fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
        crate::syntax::Program::arbitrary_with(args)
-            .prop_map(|x| Program::from(x.simplify()))
+            .prop_map(Program::from)
            .boxed()
    }
 }

+/// The representation of a statement in the language.
+///
+/// For now, this is either a binding site (`x = 4`) or a print statement
+/// (`print x`). Someday, though, more!
+///
+/// As with `Program`, this type implements [`Pretty`], which should
+/// be used to display the structure whenever possible. It does not
+/// implement [`Arbitrary`], though, mostly because it's slightly
+/// complicated to do so.
+///
 #[derive(Debug)]
 pub enum Statement {
    Binding(Location, Variable, Expression),
@@ -71,6 +99,18 @@ where
    }
 }

+/// The representation of an expression.
+///
+/// Note that expressions, like everything else in this syntax tree,
+/// supports [`Pretty`], and it's strongly encouraged that you use
+/// that trait/module when printing these structures.
+///
+/// Also, Expressions at this point in the compiler are explicitly
+/// defined so that they are *not* recursive. By this point, if an
+/// expression requires some other data (like, for example, invoking
+/// a primitive), any subexpressions have been bound to variables so
+/// that the referenced data will always either be a constant or a
+/// variable reference.
 #[derive(Debug)]
 pub enum Expression {
    Value(Location, Value),
@@ -107,6 +147,12 @@ where
    }
 }

+/// A type representing the primitives allowed in the language.
+///
+/// Having this as an enumeration avoids a lot of "this should not happen"
+/// cases, but might prove to be cumbersome in the future. If that happens,
+/// this may either become a more hierarchical enumeration, or we'll just
+/// deal with the "this should not happen" cases.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum Primitive {
    Plus,
@@ -144,6 +190,11 @@ where
    }
 }

+/// An expression that is always either a value or a reference.
+///
+/// This is the type used to guarantee that we don't nest expressions
+/// at this level. Instead, expressions that take arguments take one
+/// of these, which can only be a constant or a reference.
 #[derive(Debug)]
 pub enum ValueOrRef {
    Value(Location, Value),
@@ -163,8 +214,23 @@ where
    }
 }

+impl From<ValueOrRef> for Expression {
+    fn from(value: ValueOrRef) -> Self {
+        match value {
+            ValueOrRef::Value(loc, val) => Expression::Value(loc, val),
+            ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var),
+        }
+    }
+}
+
+/// A constant in the IR.
 #[derive(Debug)]
 pub enum Value {
+    /// A numerical constant.
+    ///
+    /// The optional argument is the base that was used by the user to input
+    /// the number. By retaining it, we can ensure that if we need to print the
+    /// number back out, we can do so in the form that the user entered it.
    Number(Option<u8>, i64),
 }

--- a/src/ir/eval.rs
+++ b/src/ir/eval.rs
@@ -4,6 +4,10 @@ use crate::ir::{Expression, Program, Statement};
 use super::{Primitive, ValueOrRef};

 impl Program {
+    /// Evaluate the program, returning either an error or a string containing everything
+    /// the program printed out.
+    ///
+    /// The print outs will be newline separated, with one print out per line.
    pub fn eval(&self) -> Result<String, EvalError> {
        let mut env = EvalEnvironment::empty();
        let mut stdout = String::new();
@@ -39,6 +43,9 @@ impl Expression {
            Expression::Primitive(_, op, args) => {
                let mut arg_values = Vec::with_capacity(args.len());

+                // we implement primitive operations by first evaluating each of the
+                // arguments to the function, and then gathering up all the values
+                // produced.
                for arg in args.iter() {
                    match arg {
                        ValueOrRef::Ref(_, n) => arg_values.push(env.lookup(n.clone())?),
@@ -48,6 +55,8 @@ impl Expression {
                    }
                }

+                // and then finally we call `calculate` to run them. trust me, it's nice
+                // to not have to deal with all the nonsense hidden under `calculate`.
                match op {
                    Primitive::Plus => Ok(Value::calculate("+", arg_values)?),
                    Primitive::Minus => Ok(Value::calculate("-", arg_values)?),
@@ -62,7 +71,7 @@ impl Expression {
 #[test]
 fn two_plus_three() {
    let input = crate::syntax::Program::parse(0, "x = 2 + 3; print x;").expect("parse works");
-    let ir = Program::from(input.simplify());
+    let ir = Program::from(input);
    let output = ir.eval().expect("runs successfully");
    assert_eq!("x = 5i64\n", &output);
 }
@@ -71,7 +80,7 @@ fn two_plus_three() {
 fn lotsa_math() {
    let input =
        crate::syntax::Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works");
-    let ir = Program::from(input.simplify());
+    let ir = Program::from(input);
    let output = ir.eval().expect("runs successfully");
    assert_eq!("x = 7i64\n", &output);
 }
--- a/src/ir/from_syntax.rs
+++ b/src/ir/from_syntax.rs
@@ -1,82 +1,185 @@
 use internment::ArcIntern;
+use std::sync::atomic::AtomicUsize;

 use crate::ir::ast as ir;
-use crate::syntax::ast as syntax;
+use crate::syntax;
+
+use super::ValueOrRef;

 impl From<syntax::Program> for ir::Program {
+    /// We implement the top-level conversion of a syntax::Program into an
+    /// ir::Program using just the standard `From::from`, because we don't
+    /// need to return any arguments and we shouldn't produce any errors.
+    /// Technically there's an `unwrap` deep under the hood that we could
+    /// float out, but the validator really should've made sure that never
+    /// happens, so we're just going to assume.
    fn from(mut value: syntax::Program) -> Self {
-        ir::Program {
-            statements: value.statements.drain(..).map(Into::into).collect(),
+        let mut statements = Vec::new();
+
+        for stmt in value.statements.drain(..) {
+            statements.append(&mut stmt.simplify());
        }
+
+        ir::Program { statements }
    }
 }

-impl From<Vec<syntax::Statement>> for ir::Program {
-    fn from(mut value: Vec<syntax::Statement>) -> Self {
-        ir::Program {
-            statements: value.drain(..).map(Into::into).collect(),
-        }
-    }
-}
-
-impl From<syntax::Statement> for ir::Statement {
+impl From<syntax::Statement> for ir::Program {
+    /// One interesting thing about this conversion is that there isn't
+    /// a natural translation from syntax::Statement to ir::Statement,
+    /// because the syntax version can have nested expressions and the
+    /// IR version can't.
+    ///
+    /// As a result, we can naturally convert a syntax::Statement into
+    /// an ir::Program, because we can allow the additional binding
+    /// sites to be generated, instead. And, bonus, it turns out that
+    /// this is what we wanted anyways.
    fn from(value: syntax::Statement) -> Self {
-        match value {
-            syntax::Statement::Binding(loc, name, expr) => {
-                ir::Statement::Binding(loc, ArcIntern::from(name), ir::Expression::from(expr))
-            }
-            syntax::Statement::Print(loc, name) => ir::Statement::Print(loc, ArcIntern::from(name)),
+        ir::Program {
+            statements: value.simplify(),
        }
    }
 }

-impl From<syntax::Expression> for ir::Expression {
-    fn from(value: syntax::Expression) -> Self {
-        match value {
-            syntax::Expression::Primitive(loc, name, mut exprs) => ir::Expression::Primitive(
+impl syntax::Statement {
+    /// Simplify a syntax::Statement into a series of ir::Statements.
+    ///
+    /// The reason this function is one-to-many is because we may have to
+    /// introduce new binding sites in order to avoid having nested
+    /// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed
+    /// in syntax::Expression but are expressly *not* allowed in
+    /// ir::Expression. So this pass converts them into bindings, like
+    /// this:
+    ///
+    ///   x = (1 + 2) * 3;
+    ///
+    ///  ==>
+    ///
+    ///   x:1 = 1 + 2;
+    ///   x:2 = x:1 * 3;
+    ///   x = x:2
+    ///
+    /// Thus ensuring that things are nice and simple. Note that the
+    /// binding of `x:2` is not, strictly speaking, necessary, but it
+    /// makes the code below much easier to read.
+    fn simplify(self) -> Vec<ir::Statement> {
+        let mut new_statements = vec![];
+
+        match self {
+            // Print statements we don't have to do much with
+            syntax::Statement::Print(loc, name) => {
+                new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
+            }
+
+            // Bindings, however, may involve a single expression turning into
+            // a series of statements and then an expression.
+            syntax::Statement::Binding(loc, name, value) => {
+                let (mut prereqs, new_value) = value.rebind(&name);
+                new_statements.append(&mut prereqs);
+                new_statements.push(ir::Statement::Binding(
                    loc,
-                ir::Primitive::try_from(name.as_str()).unwrap(),
-                exprs.drain(..).map(Into::into).collect(),
-            ),
-            syntax::Expression::Reference(loc, name) => {
-                ir::Expression::Reference(loc, ArcIntern::from(name))
-            }
-            syntax::Expression::Value(loc, value) => {
-                ir::Expression::Value(loc, ir::Value::from(value))
+                    ArcIntern::new(name),
+                    new_value.into(),
+                ))
            }
        }
+
+        new_statements
    }
 }

-impl From<syntax::Expression> for ir::ValueOrRef {
-    fn from(value: syntax::Expression) -> Self {
-        match value {
-            syntax::Expression::Primitive(loc, _, _) => {
-                panic!("{:?}: couldn't convert to valueorref", loc)
+impl syntax::Expression {
+    /// This actually does the meat of the simplification work, here, by rebinding
+    /// any nested expressions into their own variables. We have this return
+    /// `ValueOrRef` in all cases because it makes for slighly less code; in the
+    /// case when we actually want an `Expression`, we can just use `into()`.
+    fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
+        match self {
+            // Values just convert in the obvious way, and require no prereqs
+            syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
+
+            // Similarly, references just convert in the obvious way, and require
+            // no prereqs
+            syntax::Expression::Reference(loc, name) => {
+                (vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
            }

-            syntax::Expression::Reference(loc, var) => {
-                ir::ValueOrRef::Ref(loc, ArcIntern::new(var))
+            // Primitive expressions are where we do the real work.
+            syntax::Expression::Primitive(loc, prim, mut expressions) => {
+                // generate a fresh new name for the binding site we're going to
+                // introduce, basing the name on wherever we came from; so if this
+                // expression was bound to `x` originally, it might become `x:23`.
+                //
+                // gensym is guaranteed to give us a name that is unused anywhere
+                // else in the program.
+                let new_name = gensym(base_name);
+                let mut prereqs = Vec::new();
+                let mut new_exprs = Vec::new();
+
+                // here we loop through every argument, and recurse on the expressions
+                // we find. that will give us any new binding sites that *they* introduce,
+                // and a simple value or reference that we can use in our result.
+                for expr in expressions.drain(..) {
+                    let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str());
+                    prereqs.append(&mut cur_prereqs);
+                    new_exprs.push(arg);
                }

-            syntax::Expression::Value(loc, val) => ir::ValueOrRef::Value(loc, val.into()),
+                // now we're going to use those new arguments to run the primitive, binding
+                // the results to the new variable we introduced.
+                let prim =
+                    ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function");
+                prereqs.push(ir::Statement::Binding(
+                    loc.clone(),
+                    new_name.clone(),
+                    ir::Expression::Primitive(loc.clone(), prim, new_exprs),
+                ));
+
+                // and finally, we can return all the new bindings, and a reference to
+                // the variable we just introduced to hold the value of the primitive
+                // invocation.
+                (prereqs, ValueOrRef::Ref(loc, new_name))
+            }
        }
    }
 }

 impl From<syntax::Value> for ir::Value {
-    fn from(x: syntax::Value) -> Self {
-        match x {
-            syntax::Value::Number(base, value) => ir::Value::Number(base, value),
+    fn from(value: syntax::Value) -> Self {
+        match value {
+            syntax::Value::Number(base, val) => ir::Value::Number(base, val),
        }
    }
 }

+impl From<String> for ir::Primitive {
+    fn from(value: String) -> Self {
+        value.try_into().unwrap()
+    }
+}
+
+/// Generate a fresh new name based on the given name.
+///
+/// The new name is guaranteed to be unique across the entirety of the
+/// execution. This is achieved by using characters in the variable name
+/// that would not be valid input, and by including a counter that is
+/// incremented on every invocation.
+fn gensym(name: &str) -> ArcIntern<String> {
+    static COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    let new_name = format!(
+        "<{}:{}>",
+        name,
+        COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
+    );
+    ArcIntern::new(new_name)
+}
+
 proptest::proptest! {
    #[test]
    fn translation_maintains_semantics(input: syntax::Program) {
        let syntax_result = input.eval();
-        let ir = ir::Program::from(input.simplify());
+        let ir = ir::Program::from(input);
        let ir_result = ir.eval();
        assert_eq!(syntax_result, ir_result);
    }
--- a/src/ir/strings.rs
+++ b/src/ir/strings.rs
@@ -3,6 +3,10 @@ use internment::ArcIntern;
 use std::collections::HashSet;

 impl Program {
+    /// Get the complete list of strings used within the program.
+    ///
+    /// For the purposes of this function, strings are the variables used in
+    /// `print` statements.
    pub fn strings(&self) -> HashSet<ArcIntern<String>> {
        let mut result = HashSet::new();

--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,75 @@
+//! # NGR (No Good Reason) Compiler
+//!
+//! This is the top-level module for the NGR compiler; a compiler written
+//! in Rust for no good reason. I may eventually try to turn this into a
+//! basic guide for writing compilers, but for now it's a fairly silly
+//! (although complete) language and implementation, featuring:
+//!
+//!   * Variable binding with basic arithmetic operators.
+//!   * The ability to print variable values.
+//!
+//! I'll be extending this list into the future, with the eventual goal of
+//! being able to implement basic programming tasks with it. For example,
+//! I have a goal of eventually writing reasonably-clear
+//! [Advent of Code](https://adventofcode.com/) implementations with it.
+//!
+//! Users of this as a library will want to choose their adventure based
+//! on how much they want to customize their experience; I've defaulted
+//! to providing the ability to see internals, rather than masking them,
+//! so folks can play with things as they see fit.
+//!
+//! ## Easy Mode - Just Running a REPL or Compiler
+//!
+//! For easiest use, you will want to use either the [`Compiler`] object
+//! or the [`REPL`] object.
+//!
+//! As you might expect, the [`Compiler`] object builds a compiler, which
+//! can be re-used to compile as many files as you'd like. Right now,
+//! that's all it does. (TODO: Add a linker function to it.)
+//!
+//! The [`REPL`] object implements the core of what you'll need to
+//! implement a just-in-time compiled read-eval-print loop. It will
+//! maintain variable state and make sure that variables are linked
+//! appropriately as the loop progresses.
+//!
+//! ## Hard Mode - Looking at the individual passes
+//!
+//! This compiler is broken into three core parts:
+//!
+//!   1. The front-end / syntax engine. This portion of the compiler is
+//!      responsible for turning basic strings (or files) into a machine-
+//!      friendly abstract syntax tree. See the [`syntax`] module for
+//!      more information.
+//!   2. The IR. This portion of the compiler will be responsible for
+//!      high-level code analysis and transformation ... although for
+//!      now, it doesn't do much at all. See the [`ir`] module for more
+//!      information.
+//!   3. The Backend implementation. This portion of the compiler turns
+//!      the IR from the previous section into Cranelift structures, and
+//!      helps with either compiling them via JIT or statically compiling
+//!      them into a file. The [`backend`] module also contains information
+//!      about the runtime functions made available to the user.
+//!
+//! ## Testing
+//!
+//! Testing is a key focus of this effort. To that end, both the syntax
+//! tree used in the syntax module and the IR used in the middle of the
+//! compiler both implement `Arbitrary`, and are subject to property-based
+//! testing to make sure that various passes work properly.
+//!
+//! In addition, to support basic equivalence testing, we include support
+//! for evaluating all expressions. The [`eval`] module provides some
+//! utility support for this work.
+//!
 pub mod backend;
 pub mod eval;
 pub mod ir;
 pub mod syntax;
+
+/// Implementation module for the high-level compiler.
+mod compiler;
+/// Implementation module for the high-level REPL.
+mod repl;
+
+pub use crate::compiler::Compiler;
+pub use crate::repl::REPL;
--- a/src/repl.rs
+++ b/src/repl.rs
@@ -0,0 +1,166 @@
+use crate::backend::{Backend, BackendError};
+use crate::ir::Program as IR;
+use crate::syntax::{Location, ParserError, Statement};
+use codespan_reporting::diagnostic::Diagnostic;
+use codespan_reporting::files::SimpleFiles;
+use codespan_reporting::term::{self, Config};
+use cranelift_jit::JITModule;
+use cranelift_module::ModuleError;
+use pretty::termcolor::{ColorChoice, StandardStream};
+use std::collections::HashMap;
+
+/// A high-level REPL helper for NGR.
+///
+/// This object holds most of the state required to implement some
+/// form of interactive compiler for NGR; all you need to do is provide
+/// the actual user IO.
+///
+/// For most console-based used cases, the [`Default`] implementation
+/// should be sufficient; it prints any warnings or errors to `stdout`,
+/// using a default color scheme that should work based on the terminal
+/// type. For more complex interactions, though, you may want to use
+/// the `REPL::new` function to provide your own print substrate.
+pub struct REPL {
+    file_database: SimpleFiles<String, String>,
+    jitter: Backend<JITModule>,
+    variable_binding_sites: HashMap<String, Location>,
+    console: StandardStream,
+    console_config: Config,
+}
+
+impl Default for REPL {
+    fn default() -> Self {
+        let console = StandardStream::stdout(ColorChoice::Auto);
+        REPL::new(console, Config::default()).unwrap()
+    }
+}
+
+#[allow(clippy::upper_case_acronyms)]
+#[derive(Debug, thiserror::Error)]
+enum REPLError {
+    #[error("Error parsing statement: {0}")]
+    Parser(#[from] ParserError),
+    #[error("JIT error: {0}")]
+    JIT(#[from] BackendError),
+    #[error("Internal cranelift error: {0}")]
+    Cranelift(#[from] ModuleError),
+    #[error(transparent)]
+    Reporting(#[from] codespan_reporting::files::Error),
+}
+
+impl From<REPLError> for Diagnostic<usize> {
+    fn from(value: REPLError) -> Self {
+        match value {
+            REPLError::Parser(err) => Diagnostic::from(&err),
+            REPLError::JIT(err) => Diagnostic::from(err),
+            REPLError::Cranelift(err) => Diagnostic::bug().with_message(format!("{}", err)),
+            REPLError::Reporting(err) => Diagnostic::bug().with_message(format!("{}", err)),
+        }
+    }
+}
+
+impl REPL {
+    /// Construct a new REPL helper, using the given stream implementation and console configuration.
+    ///
+    /// For most users, the [`Default::default`] implementation will be sufficient;
+    /// it will use `stdout` and a default console configuration. But if you need to
+    /// be more specific, this will help you provide more guidance to the REPL as it
+    /// evaluates things.
+    pub fn new(console: StandardStream, console_config: Config) -> Result<Self, BackendError> {
+        Ok(REPL {
+            file_database: SimpleFiles::new(),
+            jitter: Backend::jit(None)?,
+            variable_binding_sites: HashMap::new(),
+            console,
+            console_config,
+        })
+    }
+
+    /// Emit a diagnostic to the configured console.
+    ///
+    /// This is just a convenience function; there's a lot of boilerplate in printing
+    /// diagnostics, and it was nice to pull it out into its own function.
+    fn emit_diagnostic(
+        &mut self,
+        diagnostic: Diagnostic<usize>,
+    ) -> Result<(), codespan_reporting::files::Error> {
+        term::emit(
+            &mut self.console,
+            &self.console_config,
+            &self.file_database,
+            &diagnostic,
+        )
+    }
+
+    /// Process a line of input, printing any problems or the results.
+    ///
+    /// The line number argument is just for a modicum of source information, to
+    /// provide to the user if some parsing or validation step fails. It can be
+    /// changed to be any value you like that provides some insight into what
+    /// failed, although it is probably a good idea for it to be different for
+    /// every invocation of this function. (Not critical, but a good idea.)
+    ///
+    /// Any warnings or errors generated in processing this command will be
+    /// printed to the configured console. If there are no problems, the
+    /// command will be compiled and then executed.
+    pub fn process_input(&mut self, line_no: usize, command: String) {
+        if let Err(err) = self.process(line_no, command) {
+            if let Err(e) = self.emit_diagnostic(Diagnostic::from(err)) {
+                eprintln!(
+                    "WOAH! System having trouble printing error messages. This is very bad. ({})",
+                    e
+                );
+            }
+        }
+    }
+
+    /// The internal implementation, with a handy `Result` type.
+    ///
+    /// All information from the documentation of `REPL::process_input` applies here,
+    /// as well; this is the internal implementation of that function, which is
+    /// differentiated by returning a `Result` type that is hidden from the user
+    /// in the case of `REPL::process_input`.
+    fn process(&mut self, line_no: usize, command: String) -> Result<(), REPLError> {
+        let entry = self.file_database.add("entry".to_string(), command);
+        let source = self
+            .file_database
+            .get(entry)
+            .expect("entry exists")
+            .source();
+        let syntax = Statement::parse(entry, source)?;
+
+        // if this is a variable binding, and we've never defined this variable before,
+        // we should tell cranelift about it. this is optimistic; if we fail to compile,
+        // then we won't use this definition until someone tries again.
+        if let Statement::Binding(_, ref name, _) = syntax {
+            if !self.variable_binding_sites.contains_key(name.as_str()) {
+                self.jitter.define_string(name)?;
+                self.jitter.define_variable(name.clone())?;
+            }
+        };
+
+        let (mut errors, mut warnings) = syntax.validate(&mut self.variable_binding_sites);
+        let stop = !errors.is_empty();
+        let messages = errors
+            .drain(..)
+            .map(Into::into)
+            .chain(warnings.drain(..).map(Into::into));
+
+        for message in messages {
+            self.emit_diagnostic(message)?;
+        }
+
+        if stop {
+            return Ok(());
+        }
+
+        let ir = IR::from(syntax);
+        let name = format!("line{}", line_no);
+        let function_id = self.jitter.compile_function(&name, ir)?;
+        self.jitter.module.finalize_definitions()?;
+        let compiled_bytes = self.jitter.bytes(function_id);
+        let compiled_function = unsafe { std::mem::transmute::<_, fn() -> ()>(compiled_bytes) };
+        compiled_function();
+        Ok(())
+    }
+}
--- a/src/syntax.rs
+++ b/src/syntax.rs
@@ -1,12 +1,36 @@
+//! NGR Parsing: Reading input, turning it into sense (or errors).
+//!
+//! This module implement the front end of the compiler, which is responsible for
+//! reading in NGR syntax as a string, turning it into a series of reasonable Rust
+//! structures for us to manipulate, and doing some validation while it's at it.
+//!
+//! The core flow for this work is:
+//!
+//!   * Turning the string into a series of language-specific [`Token`]s.
+//!   * Taking those tokens, and computing a basic syntax tree from them,
+//!     using our parser ([`ProgramParser`] or [`StatementParser`], generated
+//!     by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
+//!   * Validating the tree we have parsed, using [`Program::validate`],
+//!     returning any warnings or errors we have found.
+//!
+//! In addition to all of this, we make sure that the structures defined in this
+//! module are all:
+//!
+//!   * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back
+//!     out that can be read by a human.
+//!   * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be
+//!     used in `proptest`-based property testing. There are built-in tests in
+//!     the library, for example, to make sure that the pretty-printing round-trips.
+//!   * Can be evaluated using an `eval` function, for comparison with later
+//!     versions of the function downstream.
 use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
 use lalrpop_util::lalrpop_mod;
 use logos::Logos;

 mod arbitrary;
-pub mod ast;
+mod ast;
 mod eval;
 mod location;
-mod simplify;
 mod tokens;
 lalrpop_mod!(
    #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
@@ -18,7 +42,7 @@ mod validate;

 pub use crate::syntax::ast::*;
 pub use crate::syntax::location::Location;
-use crate::syntax::parser::ProgramParser;
+pub use crate::syntax::parser::{ProgramParser, StatementParser};
 pub use crate::syntax::tokens::{LexerError, Token};
 #[cfg(test)]
 use ::pretty::{Arena, Pretty};
@@ -29,33 +53,62 @@ use proptest::{prop_assert, prop_assert_eq};
 use std::str::FromStr;
 use thiserror::Error;

-use self::parser::StatementParser;
-
+/// One of the many errors that can occur when processing text input.
+///
+/// If you get one of these and want to display it to the user, we strongly
+/// suggest using the [`From`] implementation to turn this into a [`Diagnostic`],
+/// and then printing it via [`codespan_reporting`].
 #[derive(Debug, Error)]
 pub enum ParserError {
+    /// Raised by the lexer when we see some text that doesn't make
+    /// any sense in the language.
    #[error("Invalid token")]
    InvalidToken(Location),
+
+    /// Raised when we're parsing the file and run into an EOF in a
+    /// place we really weren't expecting.
    #[error("Unrecognized EOF")]
    UnrecognizedEOF(Location, Vec<String>),
+
+    /// Raised when we're parsing the file, and run into a token in a
+    /// place we weren't expecting it.
    #[error("Unrecognized token")]
    UnrecognizedToken(Location, Location, Token, Vec<String>),
+
+    /// Raised when we were expecting the end of the file, but instead
+    /// got another token.
    #[error("Extra token")]
    ExtraToken(Location, Token, Location),
+
+    /// Raised when the lexer just had some sort of internal problem
+    /// and just gave up.
    #[error("Lexing failure")]
    LexFailure(Location),
+
+    /// Raised when we tried to reference a file, or add a file, to our
+    /// file database, and the database ran into a problem.
    #[error("File database error")]
    FileDatabaseError(#[from] codespan_reporting::files::Error),
+
+    /// Raised when the OS is having problems giving us data.
    #[error("Read error")]
    ReadError(#[from] std::io::Error),
 }

 impl ParserError {
+    /// Convert one of lalrpop's parser errors into one of our own, which we can more
+    /// easily implement translation into [`Diagnostic`].
+    ///
+    /// This function is relatively straightforward, because we match the errors pretty
+    /// closely. The major thing we do here is convert [`lalrpop`]'s notion of a location,
+    /// which is just an offset that it got from the lexer, into an actual location that
+    /// we can use in our [`Diagnostic`]s.
    fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
        match err {
            ParseError::InvalidToken { location } => {
                ParserError::InvalidToken(Location::new(file_idx, location))
            }
-            ParseError::UnrecognizedEOF { location, expected } => {
+            ParseError::UnrecognizedEof { location, expected } => {
                ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
            }
            ParseError::UnrecognizedToken {
@@ -83,6 +136,10 @@ impl ParserError {
    }
 }

+/// This is just a nice little function to print out what we expected, if
+/// we had some expectations. Because English is a little wonky, there's
+/// some odd stuff with whether we get 0, 1, 2, or more, and it's nice to
+/// just split that bit of logic out.
 fn display_expected(expected: &[String]) -> String {
    match expected.len() {
        0 => "".to_string(),
@@ -96,6 +153,8 @@ fn display_expected(expected: &[String]) -> String {
    }
 }

+/// Given a list of strings, comma separate (with a space) them, as in an
+/// English list.
 fn comma_separate(strings: &[String]) -> String {
    let mut result = String::new();

@@ -125,12 +184,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
                let expected_str =
                    format!("unexpected token {}{}", token, display_expected(expected));
                let unexpected_str = format!("unexpected token {}", token);
-                let mut labels = start.range_label(end);
+                let labels = start.range_label(end);

                Diagnostic::error()
                    .with_labels(
                        labels
-                            .drain(..)
+                            .into_iter()
                            .map(|l| l.with_message(unexpected_str.clone()))
                            .collect(),
                    )
@@ -142,12 +201,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
                let expected_str =
                    format!("unexpected token {} after the expected end of file", token);
                let unexpected_str = format!("unexpected token {}", token);
-                let mut labels = start.range_label(end);
+                let labels = start.range_label(end);

                Diagnostic::error()
                    .with_labels(
                        labels
-                            .drain(..)
+                            .into_iter()
                            .map(|l| l.with_message(unexpected_str.clone()))
                            .collect(),
                    )
@@ -167,6 +226,14 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
 }

 impl Program {
+    /// Parse the given file, adding it to the database as part of the process.
+    ///
+    /// This operation reads the file from disk and adds it to the database for future
+    /// reference. If you get an error, we strongly suggest conversion to [`Diagnostic`]
+    /// and then reporting it to the user via [`codespan_reporting`]. You should use
+    /// this function if you're pretty sure that you've never seen this file before,
+    /// and [`Program::parse`] if you have and know its index and already have it in
+    /// memory.
    pub fn parse_file(
        file_database: &mut SimpleFiles<String, String>,
        file_name: &str,
@@ -177,6 +244,11 @@ impl Program {
        Program::parse(file_handle, file_db_info.source())
    }

+    /// Parse a block of text you have in memory, using the given index for [`Location`]s.
+    ///
+    /// If you use a nonsensical file index, everything will work fine until you try to
+    /// report an error, at which point [`codespan_reporting`] may have some nasty things
+    /// to say to you.
    pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
        let lexer = Token::lexer(buffer)
            .spanned()
@@ -188,6 +260,12 @@ impl Program {
 }

 impl Statement {
+    /// Parse a statement that you have in memory, using the given index for [`Location`]s.
+    ///
+    /// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors
+    /// when you try to print errors, but things should otherwise work fine. This function
+    /// will only parse a single statement, which is useful in the REPL, but probably shouldn't
+    /// be used when reading in whole files.
    pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
        let lexer = Token::lexer(buffer)
            .spanned()
--- a/src/syntax/ast.rs
+++ b/src/syntax/ast.rs
@@ -1,12 +1,32 @@
 use crate::syntax::Location;

+/// The set of valid binary operators.
 pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"];

+/// A structure represented a parsed program.
+///
+/// One `Program` is associated with exactly one input file, and the
+/// vector is arranged in exactly the same order as the parsed file.
+/// Because this is the syntax layer, the program is guaranteed to be
+/// syntactically valid, but may be nonsense. There could be attempts
+/// to use unbound variables, for example, until after someone runs
+/// `validate` and it comes back without errors.
 #[derive(Clone, Debug, PartialEq)]
 pub struct Program {
    pub statements: Vec<Statement>,
 }

+/// A parsed statement.
+///
+/// Statements are guaranteed to be syntactically valid, but may be
+/// complete nonsense at the semantic level. Which is to say, all the
+/// print statements were correctly formatted, and all the variables
+/// referenced are definitely valid symbols, but they may not have
+/// been defined or anything.
+///
+/// Note that equivalence testing on statements is independent of
+/// source location; it is testing if the two statements say the same
+/// thing, not if they are the exact same statement.
 #[derive(Clone, Debug)]
 pub enum Statement {
    Binding(Location, String, Expression),
@@ -28,6 +48,12 @@ impl PartialEq for Statement {
    }
 }

+/// An expression in the underlying syntax.
+///
+/// Like statements, these expressions are guaranteed to have been
+/// formatted correctly, but may not actually make any sense. Also
+/// like Statements, the [`PartialEq`] implementation does not take
+/// source positions into account.
 #[derive(Clone, Debug)]
 pub enum Expression {
    Value(Location, Value),
@@ -54,7 +80,9 @@ impl PartialEq for Expression {
    }
 }

+/// A value from the source syntax
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum Value {
+    /// The value of the number, and an optional base that it was written in
    Number(Option<u8>, i64),
 }
--- a/src/syntax/eval.rs
+++ b/src/syntax/eval.rs
@@ -4,11 +4,23 @@ use crate::eval::{EvalEnvironment, EvalError, Value};
 use crate::syntax::{Expression, Program, Statement};

 impl Program {
+    /// Evaluate the program, returning either an error or what it prints out when run.
+    ///
+    /// Doing this evaluation is particularly useful for testing, to ensure that if we
+    /// modify a program in some way it does the same thing on both sides of the
+    /// transformation. It's also sometimes just nice to know what a program will be
+    /// doing.
+    ///
+    /// Note that the errors here are slightly more strict that we enforce at runtime.
+    /// For example, we check for overflow and underflow errors during evaluation, and
+    /// we don't check for those in the compiled code.
    pub fn eval(&self) -> Result<String, EvalError> {
        let mut env = EvalEnvironment::empty();
        let mut stdout = String::new();

        for stmt in self.statements.iter() {
+            // at this point, evaluation is pretty simple. just walk through each
+            // statement, in order, and record printouts as we come to them.
            match stmt {
                Statement::Binding(_, name, value) => {
                    let actual_value = value.eval(&env)?;
@@ -40,6 +52,7 @@ impl Expression {
                let mut arg_values = Vec::with_capacity(args.len());

                for arg in args.iter() {
+                    // yay, recursion! makes this pretty straightforward
                    arg_values.push(arg.eval(env)?);
                }

--- a/src/syntax/location.rs
+++ b/src/syntax/location.rs
@@ -1,5 +1,9 @@
 use codespan_reporting::diagnostic::{Diagnostic, Label};

+/// A source location, for use in pointing users towards warnings and errors.
+///
+/// Internally, locations are very tied to the `codespan_reporting` library,
+/// and the primary use of them is to serve as anchors within that library.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Location {
    file_idx: usize,
@@ -7,10 +11,22 @@ pub struct Location {
 }

 impl Location {
+    /// Generate a new `Location` from a file index and an offset from the
+    /// start of the file.
+    ///
+    /// The file index is based on the file database being used. See the
+    /// `codespan_reporting::files::SimpleFiles::add` function, which is
+    /// normally where we get this index.
    pub fn new(file_idx: usize, offset: usize) -> Self {
        Location { file_idx, offset }
    }

+    /// Generate a `Location` for a completely manufactured bit of code.
+    ///
+    /// Ideally, this is used only in testing, as any code we generate as
+    /// part of the compiler should, theoretically, be tied to some actual
+    /// location in the source code. That being said, this can be used in
+    /// a pinch ... just maybe try to avoid it if you can.
    pub fn manufactured() -> Self {
        Location {
            file_idx: 0,
@@ -18,27 +34,73 @@ impl Location {
        }
    }

+    /// Generate a primary label for a [`Diagnostic`], based on this source
+    /// location.
+    ///
+    /// Note, this is just the [`Label`], you'll want to fill in the [`Diagnostic`]
+    /// with a lot more information.
+    ///
+    /// Primary labels are the things that are they key cause of the message.
+    /// If, for example, it was an error to bind a variable named "x", and
+    /// then have another binding of a variable named "x", the second one
+    /// would likely be the primary label (because that's where the error
+    /// actually happened), but you'd probably want to make the first location
+    /// the secondary label to help users find it.
    pub fn primary_label(&self) -> Label<usize> {
        Label::primary(self.file_idx, self.offset..self.offset)
    }

+    /// Generate a secondary label for a [`Diagnostic`], based on this source
+    /// location.
+    ///
+    /// Note, this is just the [`Label`], you'll want to fill in the [`Diagnostic`]
+    /// with a lot more information.
+    ///
+    /// Secondary labels are the things that are involved in the message, but
+    /// aren't necessarily a problem in and of themselves. If, for example, it
+    /// was an error to bind a variable named "x", and then have another binding
+    /// of a variable named "x", the second one would likely be the primary
+    /// label (because that's where the error actually happened), but you'd
+    /// probably want to make the first location the secondary label to help
+    /// users find it.
    pub fn secondary_label(&self) -> Label<usize> {
        Label::secondary(self.file_idx, self.offset..self.offset)
    }

-    pub fn range_label(&self, end: &Location) -> Vec<Label<usize>> {
-        if self.file_idx == end.file_idx {
-            vec![Label::primary(self.file_idx, self.offset..end.offset)]
-        } else if self.file_idx == 0 {
-            // if this is a manufactured item, then ... just try the other one
-            vec![Label::primary(end.file_idx, end.offset..end.offset)]
+    /// Given this location and another, generate a primary label that
+    /// specifies the area between those two locations.
+    ///
+    /// See [`Self::primary_label`] for some discussion of primary versus
+    /// secondary labels. If the two locations are the same, this method does
+    /// the exact same thing as [`Self::primary_label`]. If this item was
+    /// generated by [`Self::manufactured`], it will act as if you'd called
+    /// `primary_label` on the argument. Otherwise, it will generate the obvious
+    /// span.
+    ///
+    /// This function will return `None` only in the case that you provide
+    /// labels from two different files, which it cannot sensibly handle.
+    pub fn range_label(&self, end: &Location) -> Option<Label<usize>> {
+        if self.file_idx == 0 {
+            return Some(end.primary_label());
+        }
+
+        if self.file_idx != end.file_idx {
+            return None;
+        }
+
+        if self.offset > end.offset {
+            Some(Label::primary(self.file_idx, end.offset..self.offset))
        } else {
-            // we'll just pick the first location if this is in two different
-            // files
-            vec![Label::primary(self.file_idx, self.offset..self.offset)]
+            Some(Label::primary(self.file_idx, self.offset..end.offset))
        }
    }

+    /// Return an error diagnostic centered at this location.
+    ///
+    /// Note that this [`Diagnostic`] will have no information associated with
+    /// it other than that (a) there is an error, and (b) that the error is at
+    /// this particular location. You'll need to extend it with actually useful
+    /// information, like what kind of error it is.
    pub fn error(&self) -> Diagnostic<usize> {
        Diagnostic::error().with_labels(vec![Label::primary(
            self.file_idx,
@@ -46,6 +108,12 @@ impl Location {
        )])
    }

+    /// Return an error diagnostic centered at this location, with the given message.
+    ///
+    /// This is much more useful than [`Self::error`], because it actually provides
+    /// the user with some guidance. That being said, you still might want to add
+    /// even more information to ut, using [`Diagnostic::with_labels`],
+    /// [`Diagnostic::with_notes`], or [`Diagnostic::with_code`].
    pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
        Diagnostic::error().with_labels(vec![Label::primary(
            self.file_idx,
--- a/src/syntax/parser.lalrpop
+++ b/src/syntax/parser.lalrpop
@@ -1,14 +1,32 @@
+//! The parser for NGR!
+//!
+//! This file contains the grammar for the NGR language; a grammar is a nice,
+//! machine-readable way to describe how your language's syntax works. For
+//! example, here we describe a program as a series of statements, statements
+//! as either variable binding or print statements, etc. As the grammar gets
+//! more complicated, using tools like [`lalrpop`] becomes even more important.
+//! (Although, at some point, things can become so complicated that you might
+//! eventually want to leave lalrpop behind.)
+//!
 use crate::syntax::{LexerError, Location};
 use crate::syntax::ast::{Program,Statement,Expression,Value};
 use crate::syntax::tokens::Token;
 use internment::ArcIntern;

+// one cool thing about lalrpop: we can pass arguments. in this case, the
+// file index of the file we're parsing. we combine this with the file offset
+// that Logos gives us to make a [`crate::syntax::Location`].
 grammar(file_idx: usize);

+// this is a slighlyt odd way to describe this, but: consider this section
+// as describing the stuff that is external to the lalrpop grammar that it
+// needs to know to do its job.
 extern {
-    type Location = usize;
+    type Location = usize; // Logos, our lexer, implements locations as
+                           // offsets from the start of the file.
    type Error = LexerError;

+    // here we redeclare all of the tokens.
    enum Token {
        "=" => Token::Equals,
        ";" => Token::Semi,
@@ -22,57 +40,123 @@ extern {
        "*" => Token::Operator('*'),
        "/" => Token::Operator('/'),

+        // the previous items just match their tokens, and if you try
+        // to name and use "their value", you get their source location.
+        // For these, we want "their value" to be their actual contents,
+        // which is why we put their types in angle brackets.
        "<num>" => Token::Number((<Option<u8>>,<i64>)),
        "<var>" => Token::Variable(<ArcIntern<String>>),
    }
 }

 pub Program: Program = {
+    // a program is just a set of statements
    <stmts:Statements> => Program {
        statements: stmts
    }
 }

 Statements: Vec<Statement> = {
+    // a statement is either a set of statements followed by another
+    // statement (note, here, that you can name the result of a sub-parse
+    // using <name: subrule>) ...
    <mut stmts:Statements> <stmt:Statement> => {
        stmts.push(stmt);
        stmts
    },
+
+    // ... or it's nothing. This may feel like an awkward way to define
+    // lists of things -- and it is a bit awkward -- but there are actual
+    // technical reasons that you want to (a) use recursivion to define
+    // these, and (b) use *left* recursion, specifically. That's why, in
+    // this file, all of the recursive cases are to the left, like they
+    // are above.
+    //
+    // the details of why left recursion is better is actually pretty
+    // fiddly and in the weeds, and if you're interested you should look
+    // up LALR parsers versus LL parsers; both their differences and how
+    // they're constructed, as they're kind of neat.
+    //
+    // but if you're just writing grammars with lalrpop, then you should
+    // just remember that you should always use left recursion, and be
+    // done with it. 
    => {
        Vec::new()
    }
 }

 pub Statement: Statement = {
+    // A statement can be a variable binding. Note, here, that we use this
+    // funny @L thing to get the source location before the variable, so that
+    // we can say that this statement spans across everything.
    <l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
+
+    // Alternatively, a statement can just be a print statement.
    "print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
 }

+// Expressions! Expressions are a little fiddly, because we're going to
+// use a little bit of a trick to make sure that we get operator precedence
+// right. The trick works by creating a top-level `Expression` grammar entry
+// that just points to the thing with the *weakest* precedence. In this case,
+// we have addition, subtraction, multiplication, and division, so addition
+// and subtraction have the weakest precedence.
+//
+// Then, as we go down the precedence tree, each item will recurse (left!)
+// to other items at the same precedence level. The right hand operator, for
+// binary operators (which is all of ours, at the moment) will then be one
+// level stronger precendence. In addition, we'll let people just fall through
+// to the next level; so if there isn't an addition or subtraction, we'll just
+// fall through to the multiplication/division case.
+//
+// Finally, at the bottom, we'll have the core expressions (like constants,
+// variables, etc.) as well as a parenthesized version of `Expression`, which
+// gets us right up top again.
+//
+// Understanding why this works to solve all your operator precedence problems
+// is a little hard to give an easy intuition for, but for myself it helped
+// to run through a few examples. Consider thinking about how you want to
+// parse something like "1 + 2 * 3", for example, versus "1 + 2 + 3" or
+// "1 * 2 + 3", and hopefully that'll help.
 Expression: Expression = {
    AdditiveExpression,
 }

+// we group addition and subtraction under the heading "additive"
 AdditiveExpression: Expression = {
    <e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
    <e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
    MultiplicativeExpression,
 }

+// similarly, we group multiplication and division under "multiplicative"
 MultiplicativeExpression: Expression = {
    <e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
    <e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
    AtomicExpression,
 }

+// finally, we describe our lowest-level expressions as "atomic", because
+// they cannot be further divided into parts
 AtomicExpression: Expression = {
+    // just a variable reference
    <l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
+    // just a number
    <l:@L> <n:"<num>"> => {
        let val = Value::Number(n.0, n.1);
        Expression::Value(Location::new(file_idx, l), val)
    },
+    // a tricky case: also just a number, but using a negative sign. an
+    // alternative way to do this -- and we may do this eventually -- is
+    // to implement a unary negation expression. this has the odd effect
+    // that the user never actually writes down a negative number; they just
+    // write positive numbers which are immediately sent to a negation
+    // primitive!
    <l:@L> "-" <n:"<num>"> => {
        let val = Value::Number(n.0, -n.1);
        Expression::Value(Location::new(file_idx, l), val)
    },
+    // finally, let people parenthesize expressions and get back to a
+    // lower precedence
    "(" <e:Expression> ")" => e,
 }
--- a/src/syntax/simplify.rs
+++ b/src/syntax/simplify.rs
@@ -1,63 +0,0 @@
-use crate::syntax::ast::{Expression, Program, Statement};
-
-impl Program {
-    pub fn simplify(mut self) -> Self {
-        let mut new_statements = Vec::new();
-        let mut gensym_index = 1;
-
-        for stmt in self.statements.drain(..) {
-            new_statements.append(&mut stmt.simplify(&mut gensym_index));
-        }
-
-        self.statements = new_statements;
-        self
-    }
-}
-
-impl Statement {
-    pub fn simplify(self, gensym_index: &mut usize) -> Vec<Statement> {
-        let mut new_statements = vec![];
-
-        match self {
-            Statement::Print(_, _) => new_statements.push(self),
-            Statement::Binding(_, _, Expression::Reference(_, _)) => new_statements.push(self),
-            Statement::Binding(_, _, Expression::Value(_, _)) => new_statements.push(self),
-            Statement::Binding(loc, name, value) => {
-                let (mut prereqs, new_value) = value.rebind(&name, gensym_index);
-                new_statements.append(&mut prereqs);
-                new_statements.push(Statement::Binding(loc, name, new_value))
-            }
-        }
-
-        new_statements
-    }
-}
-
-impl Expression {
-    fn rebind(self, base_name: &str, gensym_index: &mut usize) -> (Vec<Statement>, Expression) {
-        match self {
-            Expression::Value(_, _) => (vec![], self),
-            Expression::Reference(_, _) => (vec![], self),
-            Expression::Primitive(loc, prim, mut expressions) => {
-                let mut prereqs = Vec::new();
-                let mut new_exprs = Vec::new();
-
-                for expr in expressions.drain(..) {
-                    let (mut cur_prereqs, arg) = expr.rebind(base_name, gensym_index);
-                    prereqs.append(&mut cur_prereqs);
-                    new_exprs.push(arg);
-                }
-
-                let new_name = format!("<{}:{}>", base_name, *gensym_index);
-                *gensym_index += 1;
-                prereqs.push(Statement::Binding(
-                    loc.clone(),
-                    new_name.clone(),
-                    Expression::Primitive(loc.clone(), prim, new_exprs),
-                ));
-
-                (prereqs, Expression::Reference(loc, new_name))
-            }
-        }
-    }
-}
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -4,8 +4,30 @@ use std::fmt;
 use std::num::ParseIntError;
 use thiserror::Error;

+/// A single token of the input stream; used to help the parsing go down
+/// more easily.
+///
+/// The key way to generate this structure is via the [`Logos`] trait.
+/// See the [`logos`] documentation for more information; we use the
+/// [`Token::lexer`] function internally.
+///
+/// The first step in the compilation process is turning the raw string
+/// data (in UTF-8, which is its own joy) in to a sequence of more sensible
+/// tokens. Here, for example, we turn "x=5" into three tokens: a
+/// [`Token::Variable`] for "x", a [`Token::Equals`] for the "=", and
+/// then a [`Token::Number`] for the "5". Later on, we'll worry about
+/// making sense of those three tokens.
+///
+/// For now, our list of tokens is relatively straightforward. We'll
+/// need/want to extend these later.
+///
+/// The [`std::fmt::Display`] implementation for [`Token`] should
+/// round-trip; if you lex a string generated with the [`std::fmt::Display`]
+/// trait, you should get back the exact same token.
 #[derive(Logos, Clone, Debug, PartialEq, Eq)]
 pub enum Token {
+    // Our first set of tokens are simple characters that we're
+    // going to use to structure NGR programs.
    #[token("=")]
    Equals,

@@ -18,12 +40,20 @@ pub enum Token {
    #[token(")")]
    RightParen,

+    // Next we take of any reserved words; I always like to put
+    // these before we start recognizing more complicated regular
+    // expressions. I don't think it matters, but it works for me.
    #[token("print")]
    Print,

+    // Next are the operators for NGR. We only have 4, now, but
+    // we might extend these later, or even make them user-definable!
    #[regex(r"[+\-*/]", |v| v.slice().chars().next())]
    Operator(char),

+    /// Numbers capture both the value we read from the input,
+    /// converted to an `i64`, as well as the base the user used
+    /// to write the number, if they did so.
    #[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
    #[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
    #[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
@@ -31,12 +61,23 @@ pub enum Token {
    #[regex(r"[0-9]+", |v| parse_number(None, v))]
    Number((Option<u8>, i64)),

+    // Variables; this is a very standard, simple set of characters
+    // for variables, but feel free to experiment with more complicated
+    // things. I chose to force variables to start with a lower case
+    // letter, too.
    #[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
    Variable(ArcIntern<String>),

+    // the next token will be an error token
    #[error]
+    // we're actually just going to skip whitespace, though
    #[regex(r"[ \t\r\n\f]+", logos::skip)]
+    // this is an extremely simple version of comments, just line
+    // comments. More complicated /* */ comments can be harder to
+    // implement, and didn't seem worth it at the time.
    #[regex(r"//.*", logos::skip)]
+    /// This token represents that some core error happened in lexing;
+    /// possibly that something didn't match anything at all.
    Error,
 }

@@ -63,19 +104,28 @@ impl fmt::Display for Token {
    }
 }

+/// A sudden and unexpected error in the lexer.
 #[derive(Debug, Error, PartialEq, Eq)]
 pub enum LexerError {
+    /// The `usize` here is the offset that we ran into the problem, given
+    /// from the start of the file.
    #[error("Failed lexing at {0}")]
    LexFailure(usize),
 }

 #[cfg(test)]
 impl Token {
+    /// Create a variable token with the given name. Very handy for
+    /// testing.
    pub(crate) fn var(s: &str) -> Token {
        Token::Variable(ArcIntern::new(s.to_string()))
    }
 }

+/// Parse a number in the given base, return a pair of the base and the
+/// parsed number. This is just a helper used for all of the number
+/// regular expression cases, which kicks off to the obvious Rust
+/// standard library function.
 fn parse_number(
    base: Option<u8>,
    value: &Lexer<Token>,
--- a/src/syntax/validate.rs
+++ b/src/syntax/validate.rs
@@ -2,6 +2,13 @@ use crate::syntax::{Expression, Location, Program, Statement};
 use codespan_reporting::diagnostic::Diagnostic;
 use std::collections::HashMap;

+/// An error we found while validating the input program.
+///
+/// These errors indicate that we should stop trying to compile
+/// the program, because it's just fundamentally broken in a way
+/// that we're not going to be able to work through. As with most
+/// of these errors, we recommend converting this to a [`Diagnostic`]
+/// and using [`codespan_reporting`] to present them to the user.
 pub enum Error {
    UnboundVariable(Location, String),
 }
@@ -16,6 +23,13 @@ impl From<Error> for Diagnostic<usize> {
    }
 }

+/// A problem we found validating the input that isn't critical.
+///
+/// These are things that the user might want to do something about,
+/// but we can keep going without it being a problem. As with most of
+/// these things, if you want to present this information to the user,
+/// the best way to do so is via [`From`] and [`Diagnostic`], and then
+/// interactions via [`codespan_reporting`].
 #[derive(Debug, PartialEq, Eq)]
 pub enum Warning {
    ShadowedVariable(Location, Location, String),
@@ -37,6 +51,11 @@ impl From<Warning> for Diagnostic<usize> {
 }

 impl Program {
+    /// Validate that the program makes semantic sense, not just syntactic sense.
+    ///
+    /// This checks for things like references to variables that don't exist, for
+    /// example, and generates warnings for things that are inadvisable but not
+    /// actually a problem.
    pub fn validate(&self) -> (Vec<Error>, Vec<Warning>) {
        let mut errors = vec![];
        let mut warnings = vec![];
@@ -53,6 +72,15 @@ impl Program {
 }

 impl Statement {
+    /// Validate that the statement makes semantic sense, not just syntactic sense.
+    ///
+    /// This checks for things like references to variables that don't exist, for
+    /// example, and generates warnings for things that are inadvisable but not
+    /// actually a problem. Since statements appear in a broader context, you'll
+    /// need to provide the set of variables that are bound where this statement
+    /// occurs. We use a `HashMap` to map these bound locations to the locations
+    /// where their bound, because these locations are handy when generating errors
+    /// and warnings.
    pub fn validate(
        &self,
        bound_variables: &mut HashMap<String, Location>,