Clean ups, comments on from_syntax.
This commit is contained in:
@@ -7,6 +7,12 @@ use crate::syntax;
|
|||||||
use super::ValueOrRef;
|
use super::ValueOrRef;
|
||||||
|
|
||||||
impl From<syntax::Program> for ir::Program {
|
impl From<syntax::Program> for ir::Program {
|
||||||
|
/// We implement the top-level conversion of a syntax::Program into an
|
||||||
|
/// ir::Program using just the standard `From::from`, because we don't
|
||||||
|
/// need to return any arguments and we shouldn't produce any errors.
|
||||||
|
/// Technically there's an `unwrap` deep under the hood that we could
|
||||||
|
/// float out, but the validator really should've made sure that never
|
||||||
|
/// happens, so we're just going to assume.
|
||||||
fn from(mut value: syntax::Program) -> Self {
|
fn from(mut value: syntax::Program) -> Self {
|
||||||
let mut statements = Vec::new();
|
let mut statements = Vec::new();
|
||||||
|
|
||||||
@@ -19,6 +25,15 @@ impl From<syntax::Program> for ir::Program {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl From<syntax::Statement> for ir::Program {
|
impl From<syntax::Statement> for ir::Program {
|
||||||
|
/// One interesting thing about this conversion is that there isn't
|
||||||
|
/// a natural translation from syntax::Statement to ir::Statement,
|
||||||
|
/// because the syntax version can have nested expressions and the
|
||||||
|
/// IR version can't.
|
||||||
|
///
|
||||||
|
/// As a result, we can naturally convert a syntax::Statement into
|
||||||
|
/// an ir::Program, because we can allow the additional binding
|
||||||
|
/// sites to be generated, instead. And, bonus, it turns out that
|
||||||
|
/// this is what we wanted anyways.
|
||||||
fn from(value: syntax::Statement) -> Self {
|
fn from(value: syntax::Statement) -> Self {
|
||||||
ir::Program {
|
ir::Program {
|
||||||
statements: value.simplify(),
|
statements: value.simplify(),
|
||||||
@@ -27,17 +42,45 @@ impl From<syntax::Statement> for ir::Program {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl syntax::Statement {
|
impl syntax::Statement {
|
||||||
|
/// Simplify a syntax::Statement into a series of ir::Statements.
|
||||||
|
///
|
||||||
|
/// The reason this function is one-to-many is because we may have to
|
||||||
|
/// introduce new binding sites in order to avoid having nested
|
||||||
|
/// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed
|
||||||
|
/// in syntax::Expression but are expressly *not* allowed in
|
||||||
|
/// ir::Expression. So this pass converts them into bindings, like
|
||||||
|
/// this:
|
||||||
|
///
|
||||||
|
/// x = (1 + 2) * 3;
|
||||||
|
///
|
||||||
|
/// ==>
|
||||||
|
///
|
||||||
|
/// x:1 = 1 + 2;
|
||||||
|
/// x:2 = x:1 * 3;
|
||||||
|
/// x = x:2
|
||||||
|
///
|
||||||
|
/// Thus ensuring that things are nice and simple. Note that the
|
||||||
|
/// binding of `x:2` is not, strictly speaking, necessary, but it
|
||||||
|
/// makes the code below much easier to read.
|
||||||
fn simplify(self) -> Vec<ir::Statement> {
|
fn simplify(self) -> Vec<ir::Statement> {
|
||||||
let mut new_statements = vec![];
|
let mut new_statements = vec![];
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
|
// Print statements we don't have to do much with
|
||||||
syntax::Statement::Print(loc, name) => {
|
syntax::Statement::Print(loc, name) => {
|
||||||
new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
|
new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Bindings, however, may involve a single expression turning into
|
||||||
|
// a series of statements and then an expression.
|
||||||
syntax::Statement::Binding(loc, name, value) => {
|
syntax::Statement::Binding(loc, name, value) => {
|
||||||
let (mut prereqs, new_value) = value.simplify(&name);
|
let (mut prereqs, new_value) = value.rebind(&name);
|
||||||
new_statements.append(&mut prereqs);
|
new_statements.append(&mut prereqs);
|
||||||
new_statements.push(ir::Statement::Binding(loc, ArcIntern::new(name), new_value))
|
new_statements.push(ir::Statement::Binding(
|
||||||
|
loc,
|
||||||
|
ArcIntern::new(name),
|
||||||
|
new_value.into(),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,43 +89,55 @@ impl syntax::Statement {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl syntax::Expression {
|
impl syntax::Expression {
|
||||||
fn simplify(self, base_name: &str) -> (Vec<ir::Statement>, ir::Expression) {
|
/// This actually does the meat of the simplification work, here, by rebinding
|
||||||
match self {
|
/// any nested expressions into their own variables. We have this return
|
||||||
syntax::Expression::Value(loc, val) => (vec![], ir::Expression::Value(loc, val.into())),
|
/// `ValueOrRef` in all cases because it makes for slighly less code; in the
|
||||||
syntax::Expression::Reference(loc, name) => {
|
/// case when we actually want an `Expression`, we can just use `into()`.
|
||||||
(vec![], ir::Expression::Reference(loc, ArcIntern::new(name)))
|
|
||||||
}
|
|
||||||
syntax::Expression::Primitive(_, _, _) => {
|
|
||||||
let (prereqs, val_or_ref) = self.rebind(base_name);
|
|
||||||
(prereqs, val_or_ref.into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
|
fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
|
||||||
match self {
|
match self {
|
||||||
|
// Values just convert in the obvious way, and require no prereqs
|
||||||
syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
|
syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
|
||||||
|
|
||||||
|
// Similarly, references just convert in the obvious way, and require
|
||||||
|
// no prereqs
|
||||||
syntax::Expression::Reference(loc, name) => {
|
syntax::Expression::Reference(loc, name) => {
|
||||||
(vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
|
(vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Primitive expressions are where we do the real work.
|
||||||
syntax::Expression::Primitive(loc, prim, mut expressions) => {
|
syntax::Expression::Primitive(loc, prim, mut expressions) => {
|
||||||
|
// generate a fresh new name for the binding site we're going to
|
||||||
|
// introduce, basing the name on wherever we came from; so if this
|
||||||
|
// expression was bound to `x` originally, it might become `x:23`.
|
||||||
|
//
|
||||||
|
// gensym is guaranteed to give us a name that is unused anywhere
|
||||||
|
// else in the program.
|
||||||
let new_name = gensym(base_name);
|
let new_name = gensym(base_name);
|
||||||
let mut prereqs = Vec::new();
|
let mut prereqs = Vec::new();
|
||||||
let mut new_exprs = Vec::new();
|
let mut new_exprs = Vec::new();
|
||||||
|
|
||||||
|
// here we loop through every argument, and recurse on the expressions
|
||||||
|
// we find. that will give us any new binding sites that *they* introduce,
|
||||||
|
// and a simple value or reference that we can use in our result.
|
||||||
for expr in expressions.drain(..) {
|
for expr in expressions.drain(..) {
|
||||||
let (mut cur_prereqs, arg) = expr.rebind(base_name);
|
let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str());
|
||||||
prereqs.append(&mut cur_prereqs);
|
prereqs.append(&mut cur_prereqs);
|
||||||
new_exprs.push(arg);
|
new_exprs.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
let prim = ir::Primitive::try_from(prim.as_str()).unwrap();
|
// now we're going to use those new arguments to run the primitive, binding
|
||||||
|
// the results to the new variable we introduced.
|
||||||
|
let prim =
|
||||||
|
ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function");
|
||||||
prereqs.push(ir::Statement::Binding(
|
prereqs.push(ir::Statement::Binding(
|
||||||
loc.clone(),
|
loc.clone(),
|
||||||
new_name.clone(),
|
new_name.clone(),
|
||||||
ir::Expression::Primitive(loc.clone(), prim, new_exprs),
|
ir::Expression::Primitive(loc.clone(), prim, new_exprs),
|
||||||
));
|
));
|
||||||
|
|
||||||
|
// and finally, we can return all the new bindings, and a reference to
|
||||||
|
// the variable we just introduced to hold the value of the primitive
|
||||||
|
// invocation.
|
||||||
(prereqs, ValueOrRef::Ref(loc, new_name))
|
(prereqs, ValueOrRef::Ref(loc, new_name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -103,6 +158,12 @@ impl From<String> for ir::Primitive {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate a fresh new name based on the given name.
|
||||||
|
///
|
||||||
|
/// The new name is guaranteed to be unique across the entirety of the
|
||||||
|
/// execution. This is achieved by using characters in the variable name
|
||||||
|
/// that would not be valid input, and by including a counter that is
|
||||||
|
/// incremented on every invocation.
|
||||||
fn gensym(name: &str) -> ArcIntern<String> {
|
fn gensym(name: &str) -> ArcIntern<String> {
|
||||||
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||||
|
|
||||||
|
|||||||
@@ -12,9 +12,6 @@
|
|||||||
//! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
|
//! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
|
||||||
//! * Validating the tree we have parsed, using the [`validate`] module,
|
//! * Validating the tree we have parsed, using the [`validate`] module,
|
||||||
//! returning any warnings or errors we have found.
|
//! returning any warnings or errors we have found.
|
||||||
//! * Simplifying the tree we have parsed, using the [`simplify`] module,
|
|
||||||
//! into something that's more easily turned into our [compiler internal
|
|
||||||
//! representation](super::ir).
|
|
||||||
//!
|
//!
|
||||||
//! In addition to all of this, we make sure that the structures defined in this
|
//! In addition to all of this, we make sure that the structures defined in this
|
||||||
//! module are all:
|
//! module are all:
|
||||||
|
|||||||
Reference in New Issue
Block a user