From 309983ef3e9b523a2ae2a0b05551b2894839bb61 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Tue, 9 May 2023 21:51:57 -0700 Subject: [PATCH] Clean ups, comments on from_syntax. --- src/ir/from_syntax.rs | 95 +++++++++++++++++++++++++++++++++++-------- src/syntax.rs | 3 -- 2 files changed, 78 insertions(+), 20 deletions(-) diff --git a/src/ir/from_syntax.rs b/src/ir/from_syntax.rs index fbd35a6..46c7c69 100644 --- a/src/ir/from_syntax.rs +++ b/src/ir/from_syntax.rs @@ -7,6 +7,12 @@ use crate::syntax; use super::ValueOrRef; impl From for ir::Program { + /// We implement the top-level conversion of a syntax::Program into an + /// ir::Program using just the standard `From::from`, because we don't + /// need to return any arguments and we shouldn't produce any errors. + /// Technically there's an `unwrap` deep under the hood that we could + /// float out, but the validator really should've made sure that never + /// happens, so we're just going to assume. fn from(mut value: syntax::Program) -> Self { let mut statements = Vec::new(); @@ -19,6 +25,15 @@ impl From for ir::Program { } impl From for ir::Program { + /// One interesting thing about this conversion is that there isn't + /// a natural translation from syntax::Statement to ir::Statement, + /// because the syntax version can have nested expressions and the + /// IR version can't. + /// + /// As a result, we can naturally convert a syntax::Statement into + /// an ir::Program, because we can allow the additional binding + /// sites to be generated, instead. And, bonus, it turns out that + /// this is what we wanted anyways. fn from(value: syntax::Statement) -> Self { ir::Program { statements: value.simplify(), @@ -27,17 +42,45 @@ impl From for ir::Program { } impl syntax::Statement { + /// Simplify a syntax::Statement into a series of ir::Statements. + /// + /// The reason this function is one-to-many is because we may have to + /// introduce new binding sites in order to avoid having nested + /// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed + /// in syntax::Expression but are expressly *not* allowed in + /// ir::Expression. So this pass converts them into bindings, like + /// this: + /// + /// x = (1 + 2) * 3; + /// + /// ==> + /// + /// x:1 = 1 + 2; + /// x:2 = x:1 * 3; + /// x = x:2 + /// + /// Thus ensuring that things are nice and simple. Note that the + /// binding of `x:2` is not, strictly speaking, necessary, but it + /// makes the code below much easier to read. fn simplify(self) -> Vec { let mut new_statements = vec![]; match self { + // Print statements we don't have to do much with syntax::Statement::Print(loc, name) => { new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name))) } + + // Bindings, however, may involve a single expression turning into + // a series of statements and then an expression. syntax::Statement::Binding(loc, name, value) => { - let (mut prereqs, new_value) = value.simplify(&name); + let (mut prereqs, new_value) = value.rebind(&name); new_statements.append(&mut prereqs); - new_statements.push(ir::Statement::Binding(loc, ArcIntern::new(name), new_value)) + new_statements.push(ir::Statement::Binding( + loc, + ArcIntern::new(name), + new_value.into(), + )) } } @@ -46,43 +89,55 @@ impl syntax::Statement { } impl syntax::Expression { - fn simplify(self, base_name: &str) -> (Vec, ir::Expression) { - match self { - syntax::Expression::Value(loc, val) => (vec![], ir::Expression::Value(loc, val.into())), - syntax::Expression::Reference(loc, name) => { - (vec![], ir::Expression::Reference(loc, ArcIntern::new(name))) - } - syntax::Expression::Primitive(_, _, _) => { - let (prereqs, val_or_ref) = self.rebind(base_name); - (prereqs, val_or_ref.into()) - } - } - } - + /// This actually does the meat of the simplification work, here, by rebinding + /// any nested expressions into their own variables. We have this return + /// `ValueOrRef` in all cases because it makes for slighly less code; in the + /// case when we actually want an `Expression`, we can just use `into()`. fn rebind(self, base_name: &str) -> (Vec, ir::ValueOrRef) { match self { + // Values just convert in the obvious way, and require no prereqs syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())), + + // Similarly, references just convert in the obvious way, and require + // no prereqs syntax::Expression::Reference(loc, name) => { (vec![], ValueOrRef::Ref(loc, ArcIntern::new(name))) } + + // Primitive expressions are where we do the real work. syntax::Expression::Primitive(loc, prim, mut expressions) => { + // generate a fresh new name for the binding site we're going to + // introduce, basing the name on wherever we came from; so if this + // expression was bound to `x` originally, it might become `x:23`. + // + // gensym is guaranteed to give us a name that is unused anywhere + // else in the program. let new_name = gensym(base_name); let mut prereqs = Vec::new(); let mut new_exprs = Vec::new(); + // here we loop through every argument, and recurse on the expressions + // we find. that will give us any new binding sites that *they* introduce, + // and a simple value or reference that we can use in our result. for expr in expressions.drain(..) { - let (mut cur_prereqs, arg) = expr.rebind(base_name); + let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str()); prereqs.append(&mut cur_prereqs); new_exprs.push(arg); } - let prim = ir::Primitive::try_from(prim.as_str()).unwrap(); + // now we're going to use those new arguments to run the primitive, binding + // the results to the new variable we introduced. + let prim = + ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function"); prereqs.push(ir::Statement::Binding( loc.clone(), new_name.clone(), ir::Expression::Primitive(loc.clone(), prim, new_exprs), )); + // and finally, we can return all the new bindings, and a reference to + // the variable we just introduced to hold the value of the primitive + // invocation. (prereqs, ValueOrRef::Ref(loc, new_name)) } } @@ -103,6 +158,12 @@ impl From for ir::Primitive { } } +/// Generate a fresh new name based on the given name. +/// +/// The new name is guaranteed to be unique across the entirety of the +/// execution. This is achieved by using characters in the variable name +/// that would not be valid input, and by including a counter that is +/// incremented on every invocation. fn gensym(name: &str) -> ArcIntern { static COUNTER: AtomicUsize = AtomicUsize::new(0); diff --git a/src/syntax.rs b/src/syntax.rs index 23eef81..0fdc2e2 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -12,9 +12,6 @@ //! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)). //! * Validating the tree we have parsed, using the [`validate`] module, //! returning any warnings or errors we have found. -//! * Simplifying the tree we have parsed, using the [`simplify`] module, -//! into something that's more easily turned into our [compiler internal -//! representation](super::ir). //! //! In addition to all of this, we make sure that the structures defined in this //! module are all: