📜 Add better documentation across the compiler. (#3)

These changes pay particular attention to API endpoints, to try to ensure that any rustdocs generated are detailed and sensible. A good next step, eventually, might be to include doctest examples, as well. For the moment, it's not clear that they would provide a lot of value, though. In addition, this does a couple refactors to simplify the code base in ways that make things clearer or, at least, briefer.
2023-05-13 14:34:48 -05:00
parent f4594bf2cc
commit 1fbfd0c2d2
28 changed files with 1550 additions and 432 deletions
--- a/src/ir/from_syntax.rs
+++ b/src/ir/from_syntax.rs
@@ -1,82 +1,185 @@
 use internment::ArcIntern;
+use std::sync::atomic::AtomicUsize;

 use crate::ir::ast as ir;
-use crate::syntax::ast as syntax;
+use crate::syntax;
+
+use super::ValueOrRef;

 impl From<syntax::Program> for ir::Program {
+    /// We implement the top-level conversion of a syntax::Program into an
+    /// ir::Program using just the standard `From::from`, because we don't
+    /// need to return any arguments and we shouldn't produce any errors.
+    /// Technically there's an `unwrap` deep under the hood that we could
+    /// float out, but the validator really should've made sure that never
+    /// happens, so we're just going to assume.
    fn from(mut value: syntax::Program) -> Self {
-        ir::Program {
-            statements: value.statements.drain(..).map(Into::into).collect(),
+        let mut statements = Vec::new();
+
+        for stmt in value.statements.drain(..) {
+            statements.append(&mut stmt.simplify());
        }
+
+        ir::Program { statements }
    }
 }

-impl From<Vec<syntax::Statement>> for ir::Program {
-    fn from(mut value: Vec<syntax::Statement>) -> Self {
-        ir::Program {
-            statements: value.drain(..).map(Into::into).collect(),
-        }
-    }
-}
-
-impl From<syntax::Statement> for ir::Statement {
+impl From<syntax::Statement> for ir::Program {
+    /// One interesting thing about this conversion is that there isn't
+    /// a natural translation from syntax::Statement to ir::Statement,
+    /// because the syntax version can have nested expressions and the
+    /// IR version can't.
+    ///
+    /// As a result, we can naturally convert a syntax::Statement into
+    /// an ir::Program, because we can allow the additional binding
+    /// sites to be generated, instead. And, bonus, it turns out that
+    /// this is what we wanted anyways.
    fn from(value: syntax::Statement) -> Self {
-        match value {
-            syntax::Statement::Binding(loc, name, expr) => {
-                ir::Statement::Binding(loc, ArcIntern::from(name), ir::Expression::from(expr))
-            }
-            syntax::Statement::Print(loc, name) => ir::Statement::Print(loc, ArcIntern::from(name)),
+        ir::Program {
+            statements: value.simplify(),
        }
    }
 }

-impl From<syntax::Expression> for ir::Expression {
-    fn from(value: syntax::Expression) -> Self {
-        match value {
-            syntax::Expression::Primitive(loc, name, mut exprs) => ir::Expression::Primitive(
-                loc,
-                ir::Primitive::try_from(name.as_str()).unwrap(),
-                exprs.drain(..).map(Into::into).collect(),
-            ),
+impl syntax::Statement {
+    /// Simplify a syntax::Statement into a series of ir::Statements.
+    ///
+    /// The reason this function is one-to-many is because we may have to
+    /// introduce new binding sites in order to avoid having nested
+    /// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed
+    /// in syntax::Expression but are expressly *not* allowed in
+    /// ir::Expression. So this pass converts them into bindings, like
+    /// this:
+    ///
+    ///   x = (1 + 2) * 3;
+    ///
+    ///  ==>
+    ///
+    ///   x:1 = 1 + 2;
+    ///   x:2 = x:1 * 3;
+    ///   x = x:2
+    ///
+    /// Thus ensuring that things are nice and simple. Note that the
+    /// binding of `x:2` is not, strictly speaking, necessary, but it
+    /// makes the code below much easier to read.
+    fn simplify(self) -> Vec<ir::Statement> {
+        let mut new_statements = vec![];
+
+        match self {
+            // Print statements we don't have to do much with
+            syntax::Statement::Print(loc, name) => {
+                new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
+            }
+
+            // Bindings, however, may involve a single expression turning into
+            // a series of statements and then an expression.
+            syntax::Statement::Binding(loc, name, value) => {
+                let (mut prereqs, new_value) = value.rebind(&name);
+                new_statements.append(&mut prereqs);
+                new_statements.push(ir::Statement::Binding(
+                    loc,
+                    ArcIntern::new(name),
+                    new_value.into(),
+                ))
+            }
+        }
+
+        new_statements
+    }
+}
+
+impl syntax::Expression {
+    /// This actually does the meat of the simplification work, here, by rebinding
+    /// any nested expressions into their own variables. We have this return
+    /// `ValueOrRef` in all cases because it makes for slighly less code; in the
+    /// case when we actually want an `Expression`, we can just use `into()`.
+    fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
+        match self {
+            // Values just convert in the obvious way, and require no prereqs
+            syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
+
+            // Similarly, references just convert in the obvious way, and require
+            // no prereqs
            syntax::Expression::Reference(loc, name) => {
-                ir::Expression::Reference(loc, ArcIntern::from(name))
-            }
-            syntax::Expression::Value(loc, value) => {
-                ir::Expression::Value(loc, ir::Value::from(value))
-            }
-        }
-    }
-}
-
-impl From<syntax::Expression> for ir::ValueOrRef {
-    fn from(value: syntax::Expression) -> Self {
-        match value {
-            syntax::Expression::Primitive(loc, _, _) => {
-                panic!("{:?}: couldn't convert to valueorref", loc)
+                (vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
            }

-            syntax::Expression::Reference(loc, var) => {
-                ir::ValueOrRef::Ref(loc, ArcIntern::new(var))
-            }
+            // Primitive expressions are where we do the real work.
+            syntax::Expression::Primitive(loc, prim, mut expressions) => {
+                // generate a fresh new name for the binding site we're going to
+                // introduce, basing the name on wherever we came from; so if this
+                // expression was bound to `x` originally, it might become `x:23`.
+                //
+                // gensym is guaranteed to give us a name that is unused anywhere
+                // else in the program.
+                let new_name = gensym(base_name);
+                let mut prereqs = Vec::new();
+                let mut new_exprs = Vec::new();

-            syntax::Expression::Value(loc, val) => ir::ValueOrRef::Value(loc, val.into()),
+                // here we loop through every argument, and recurse on the expressions
+                // we find. that will give us any new binding sites that *they* introduce,
+                // and a simple value or reference that we can use in our result.
+                for expr in expressions.drain(..) {
+                    let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str());
+                    prereqs.append(&mut cur_prereqs);
+                    new_exprs.push(arg);
+                }
+
+                // now we're going to use those new arguments to run the primitive, binding
+                // the results to the new variable we introduced.
+                let prim =
+                    ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function");
+                prereqs.push(ir::Statement::Binding(
+                    loc.clone(),
+                    new_name.clone(),
+                    ir::Expression::Primitive(loc.clone(), prim, new_exprs),
+                ));
+
+                // and finally, we can return all the new bindings, and a reference to
+                // the variable we just introduced to hold the value of the primitive
+                // invocation.
+                (prereqs, ValueOrRef::Ref(loc, new_name))
+            }
        }
    }
 }

 impl From<syntax::Value> for ir::Value {
-    fn from(x: syntax::Value) -> Self {
-        match x {
-            syntax::Value::Number(base, value) => ir::Value::Number(base, value),
+    fn from(value: syntax::Value) -> Self {
+        match value {
+            syntax::Value::Number(base, val) => ir::Value::Number(base, val),
        }
    }
 }

+impl From<String> for ir::Primitive {
+    fn from(value: String) -> Self {
+        value.try_into().unwrap()
+    }
+}
+
+/// Generate a fresh new name based on the given name.
+///
+/// The new name is guaranteed to be unique across the entirety of the
+/// execution. This is achieved by using characters in the variable name
+/// that would not be valid input, and by including a counter that is
+/// incremented on every invocation.
+fn gensym(name: &str) -> ArcIntern<String> {
+    static COUNTER: AtomicUsize = AtomicUsize::new(0);
+
+    let new_name = format!(
+        "<{}:{}>",
+        name,
+        COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
+    );
+    ArcIntern::new(new_name)
+}
+
 proptest::proptest! {
    #[test]
    fn translation_maintains_semantics(input: syntax::Program) {
        let syntax_result = input.eval();
-        let ir = ir::Program::from(input.simplify());
+        let ir = ir::Program::from(input);
        let ir_result = ir.eval();
        assert_eq!(syntax_result, ir_result);
    }