use internment::ArcIntern; use std::sync::atomic::AtomicUsize; use crate::ir::ast as ir; use crate::syntax; use super::ValueOrRef; impl From for ir::Program { /// We implement the top-level conversion of a syntax::Program into an /// ir::Program using just the standard `From::from`, because we don't /// need to return any arguments and we shouldn't produce any errors. /// Technically there's an `unwrap` deep under the hood that we could /// float out, but the validator really should've made sure that never /// happens, so we're just going to assume. fn from(mut value: syntax::Program) -> Self { let mut statements = Vec::new(); for stmt in value.statements.drain(..) { statements.append(&mut stmt.simplify()); } ir::Program { statements } } } impl From for ir::Program { /// One interesting thing about this conversion is that there isn't /// a natural translation from syntax::Statement to ir::Statement, /// because the syntax version can have nested expressions and the /// IR version can't. /// /// As a result, we can naturally convert a syntax::Statement into /// an ir::Program, because we can allow the additional binding /// sites to be generated, instead. And, bonus, it turns out that /// this is what we wanted anyways. fn from(value: syntax::Statement) -> Self { ir::Program { statements: value.simplify(), } } } impl syntax::Statement { /// Simplify a syntax::Statement into a series of ir::Statements. /// /// The reason this function is one-to-many is because we may have to /// introduce new binding sites in order to avoid having nested /// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed /// in syntax::Expression but are expressly *not* allowed in /// ir::Expression. So this pass converts them into bindings, like /// this: /// /// x = (1 + 2) * 3; /// /// ==> /// /// x:1 = 1 + 2; /// x:2 = x:1 * 3; /// x = x:2 /// /// Thus ensuring that things are nice and simple. Note that the /// binding of `x:2` is not, strictly speaking, necessary, but it /// makes the code below much easier to read. fn simplify(self) -> Vec { let mut new_statements = vec![]; match self { // Print statements we don't have to do much with syntax::Statement::Print(loc, name) => { new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name))) } // Bindings, however, may involve a single expression turning into // a series of statements and then an expression. syntax::Statement::Binding(loc, name, value) => { let (mut prereqs, new_value) = value.rebind(&name); new_statements.append(&mut prereqs); new_statements.push(ir::Statement::Binding( loc, ArcIntern::new(name), new_value.into(), )) } } new_statements } } impl syntax::Expression { /// This actually does the meat of the simplification work, here, by rebinding /// any nested expressions into their own variables. We have this return /// `ValueOrRef` in all cases because it makes for slighly less code; in the /// case when we actually want an `Expression`, we can just use `into()`. fn rebind(self, base_name: &str) -> (Vec, ir::ValueOrRef) { match self { // Values just convert in the obvious way, and require no prereqs syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())), // Similarly, references just convert in the obvious way, and require // no prereqs syntax::Expression::Reference(loc, name) => { (vec![], ValueOrRef::Ref(loc, ArcIntern::new(name))) } syntax::Expression::Cast(_, _, _) => unimplemented!(), // Primitive expressions are where we do the real work. syntax::Expression::Primitive(loc, prim, mut expressions) => { // generate a fresh new name for the binding site we're going to // introduce, basing the name on wherever we came from; so if this // expression was bound to `x` originally, it might become `x:23`. // // gensym is guaranteed to give us a name that is unused anywhere // else in the program. let new_name = gensym(base_name); let mut prereqs = Vec::new(); let mut new_exprs = Vec::new(); // here we loop through every argument, and recurse on the expressions // we find. that will give us any new binding sites that *they* introduce, // and a simple value or reference that we can use in our result. for expr in expressions.drain(..) { let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str()); prereqs.append(&mut cur_prereqs); new_exprs.push(arg); } // now we're going to use those new arguments to run the primitive, binding // the results to the new variable we introduced. let prim = ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function"); prereqs.push(ir::Statement::Binding( loc.clone(), new_name.clone(), ir::Expression::Primitive(loc.clone(), prim, new_exprs), )); // and finally, we can return all the new bindings, and a reference to // the variable we just introduced to hold the value of the primitive // invocation. (prereqs, ValueOrRef::Ref(loc, new_name)) } } } } impl From for ir::Value { fn from(value: syntax::Value) -> Self { match value { syntax::Value::Number(base, val) => ir::Value::Number(base, val), } } } impl From for ir::Primitive { fn from(value: String) -> Self { value.try_into().unwrap() } } /// Generate a fresh new name based on the given name. /// /// The new name is guaranteed to be unique across the entirety of the /// execution. This is achieved by using characters in the variable name /// that would not be valid input, and by including a counter that is /// incremented on every invocation. fn gensym(name: &str) -> ArcIntern { static COUNTER: AtomicUsize = AtomicUsize::new(0); let new_name = format!( "<{}:{}>", name, COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst) ); ArcIntern::new(new_name) } proptest::proptest! { #[test] fn translation_maintains_semantics(input: syntax::Program) { let syntax_result = input.eval(); let ir = ir::Program::from(input); let ir_result = ir.eval(); assert_eq!(syntax_result, ir_result); } }