📜 Add better documentation across the compiler. (#3)
These changes pay particular attention to API endpoints, to try to ensure that any rustdocs generated are detailed and sensible. A good next step, eventually, might be to include doctest examples, as well. For the moment, it's not clear that they would provide a lot of value, though. In addition, this does a couple refactors to simplify the code base in ways that make things clearer or, at least, briefer.
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use crate::syntax::Location;
|
||||
use internment::ArcIntern;
|
||||
use pretty::{DocAllocator, Pretty};
|
||||
use proptest::{
|
||||
@@ -5,13 +6,28 @@ use proptest::{
|
||||
strategy::{BoxedStrategy, Strategy},
|
||||
};
|
||||
|
||||
use crate::syntax::Location;
|
||||
|
||||
/// We're going to represent variables as interned strings.
|
||||
///
|
||||
/// These should be fast enough for comparison that it's OK, since it's going to end up
|
||||
/// being pretty much the pointer to the string.
|
||||
type Variable = ArcIntern<String>;
|
||||
|
||||
/// The representation of a program within our IR. For now, this is exactly one file.
|
||||
///
|
||||
/// In addition, for the moment there's not really much of interest to hold here besides
|
||||
/// the list of statements read from the file. Order is important. In the future, you
|
||||
/// could imagine caching analysis information in this structure.
|
||||
///
|
||||
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
|
||||
/// to print the structure whenever possible, especially if you value your or your
|
||||
/// user's time. The latter is useful for testing that conversions of `Program` retain
|
||||
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
|
||||
/// syntactically valid, although they may contain runtime issue like over- or underflow.
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
pub statements: Vec<Statement>,
|
||||
// For now, a program is just a vector of statements. In the future, we'll probably
|
||||
// extend this to include a bunch of other information, but for now: just a list.
|
||||
pub(crate) statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
|
||||
@@ -23,6 +39,8 @@ where
|
||||
let mut result = allocator.nil();
|
||||
|
||||
for stmt in self.statements.iter() {
|
||||
// there's probably a better way to do this, rather than constantly
|
||||
// adding to the end, but this works.
|
||||
result = result
|
||||
.append(stmt.pretty(allocator))
|
||||
.append(allocator.text(";"))
|
||||
@@ -39,11 +57,21 @@ impl Arbitrary for Program {
|
||||
|
||||
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
||||
crate::syntax::Program::arbitrary_with(args)
|
||||
.prop_map(|x| Program::from(x.simplify()))
|
||||
.prop_map(Program::from)
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
/// The representation of a statement in the language.
|
||||
///
|
||||
/// For now, this is either a binding site (`x = 4`) or a print statement
|
||||
/// (`print x`). Someday, though, more!
|
||||
///
|
||||
/// As with `Program`, this type implements [`Pretty`], which should
|
||||
/// be used to display the structure whenever possible. It does not
|
||||
/// implement [`Arbitrary`], though, mostly because it's slightly
|
||||
/// complicated to do so.
|
||||
///
|
||||
#[derive(Debug)]
|
||||
pub enum Statement {
|
||||
Binding(Location, Variable, Expression),
|
||||
@@ -71,6 +99,18 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// The representation of an expression.
|
||||
///
|
||||
/// Note that expressions, like everything else in this syntax tree,
|
||||
/// supports [`Pretty`], and it's strongly encouraged that you use
|
||||
/// that trait/module when printing these structures.
|
||||
///
|
||||
/// Also, Expressions at this point in the compiler are explicitly
|
||||
/// defined so that they are *not* recursive. By this point, if an
|
||||
/// expression requires some other data (like, for example, invoking
|
||||
/// a primitive), any subexpressions have been bound to variables so
|
||||
/// that the referenced data will always either be a constant or a
|
||||
/// variable reference.
|
||||
#[derive(Debug)]
|
||||
pub enum Expression {
|
||||
Value(Location, Value),
|
||||
@@ -107,6 +147,12 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// A type representing the primitives allowed in the language.
|
||||
///
|
||||
/// Having this as an enumeration avoids a lot of "this should not happen"
|
||||
/// cases, but might prove to be cumbersome in the future. If that happens,
|
||||
/// this may either become a more hierarchical enumeration, or we'll just
|
||||
/// deal with the "this should not happen" cases.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum Primitive {
|
||||
Plus,
|
||||
@@ -144,6 +190,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// An expression that is always either a value or a reference.
|
||||
///
|
||||
/// This is the type used to guarantee that we don't nest expressions
|
||||
/// at this level. Instead, expressions that take arguments take one
|
||||
/// of these, which can only be a constant or a reference.
|
||||
#[derive(Debug)]
|
||||
pub enum ValueOrRef {
|
||||
Value(Location, Value),
|
||||
@@ -163,8 +214,23 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValueOrRef> for Expression {
|
||||
fn from(value: ValueOrRef) -> Self {
|
||||
match value {
|
||||
ValueOrRef::Value(loc, val) => Expression::Value(loc, val),
|
||||
ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A constant in the IR.
|
||||
#[derive(Debug)]
|
||||
pub enum Value {
|
||||
/// A numerical constant.
|
||||
///
|
||||
/// The optional argument is the base that was used by the user to input
|
||||
/// the number. By retaining it, we can ensure that if we need to print the
|
||||
/// number back out, we can do so in the form that the user entered it.
|
||||
Number(Option<u8>, i64),
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,10 @@ use crate::ir::{Expression, Program, Statement};
|
||||
use super::{Primitive, ValueOrRef};
|
||||
|
||||
impl Program {
|
||||
/// Evaluate the program, returning either an error or a string containing everything
|
||||
/// the program printed out.
|
||||
///
|
||||
/// The print outs will be newline separated, with one print out per line.
|
||||
pub fn eval(&self) -> Result<String, EvalError> {
|
||||
let mut env = EvalEnvironment::empty();
|
||||
let mut stdout = String::new();
|
||||
@@ -39,6 +43,9 @@ impl Expression {
|
||||
Expression::Primitive(_, op, args) => {
|
||||
let mut arg_values = Vec::with_capacity(args.len());
|
||||
|
||||
// we implement primitive operations by first evaluating each of the
|
||||
// arguments to the function, and then gathering up all the values
|
||||
// produced.
|
||||
for arg in args.iter() {
|
||||
match arg {
|
||||
ValueOrRef::Ref(_, n) => arg_values.push(env.lookup(n.clone())?),
|
||||
@@ -48,6 +55,8 @@ impl Expression {
|
||||
}
|
||||
}
|
||||
|
||||
// and then finally we call `calculate` to run them. trust me, it's nice
|
||||
// to not have to deal with all the nonsense hidden under `calculate`.
|
||||
match op {
|
||||
Primitive::Plus => Ok(Value::calculate("+", arg_values)?),
|
||||
Primitive::Minus => Ok(Value::calculate("-", arg_values)?),
|
||||
@@ -62,7 +71,7 @@ impl Expression {
|
||||
#[test]
|
||||
fn two_plus_three() {
|
||||
let input = crate::syntax::Program::parse(0, "x = 2 + 3; print x;").expect("parse works");
|
||||
let ir = Program::from(input.simplify());
|
||||
let ir = Program::from(input);
|
||||
let output = ir.eval().expect("runs successfully");
|
||||
assert_eq!("x = 5i64\n", &output);
|
||||
}
|
||||
@@ -71,7 +80,7 @@ fn two_plus_three() {
|
||||
fn lotsa_math() {
|
||||
let input =
|
||||
crate::syntax::Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works");
|
||||
let ir = Program::from(input.simplify());
|
||||
let ir = Program::from(input);
|
||||
let output = ir.eval().expect("runs successfully");
|
||||
assert_eq!("x = 7i64\n", &output);
|
||||
}
|
||||
|
||||
@@ -1,82 +1,185 @@
|
||||
use internment::ArcIntern;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
|
||||
use crate::ir::ast as ir;
|
||||
use crate::syntax::ast as syntax;
|
||||
use crate::syntax;
|
||||
|
||||
use super::ValueOrRef;
|
||||
|
||||
impl From<syntax::Program> for ir::Program {
|
||||
/// We implement the top-level conversion of a syntax::Program into an
|
||||
/// ir::Program using just the standard `From::from`, because we don't
|
||||
/// need to return any arguments and we shouldn't produce any errors.
|
||||
/// Technically there's an `unwrap` deep under the hood that we could
|
||||
/// float out, but the validator really should've made sure that never
|
||||
/// happens, so we're just going to assume.
|
||||
fn from(mut value: syntax::Program) -> Self {
|
||||
ir::Program {
|
||||
statements: value.statements.drain(..).map(Into::into).collect(),
|
||||
let mut statements = Vec::new();
|
||||
|
||||
for stmt in value.statements.drain(..) {
|
||||
statements.append(&mut stmt.simplify());
|
||||
}
|
||||
|
||||
ir::Program { statements }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<syntax::Statement>> for ir::Program {
|
||||
fn from(mut value: Vec<syntax::Statement>) -> Self {
|
||||
ir::Program {
|
||||
statements: value.drain(..).map(Into::into).collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<syntax::Statement> for ir::Statement {
|
||||
impl From<syntax::Statement> for ir::Program {
|
||||
/// One interesting thing about this conversion is that there isn't
|
||||
/// a natural translation from syntax::Statement to ir::Statement,
|
||||
/// because the syntax version can have nested expressions and the
|
||||
/// IR version can't.
|
||||
///
|
||||
/// As a result, we can naturally convert a syntax::Statement into
|
||||
/// an ir::Program, because we can allow the additional binding
|
||||
/// sites to be generated, instead. And, bonus, it turns out that
|
||||
/// this is what we wanted anyways.
|
||||
fn from(value: syntax::Statement) -> Self {
|
||||
match value {
|
||||
syntax::Statement::Binding(loc, name, expr) => {
|
||||
ir::Statement::Binding(loc, ArcIntern::from(name), ir::Expression::from(expr))
|
||||
}
|
||||
syntax::Statement::Print(loc, name) => ir::Statement::Print(loc, ArcIntern::from(name)),
|
||||
ir::Program {
|
||||
statements: value.simplify(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<syntax::Expression> for ir::Expression {
|
||||
fn from(value: syntax::Expression) -> Self {
|
||||
match value {
|
||||
syntax::Expression::Primitive(loc, name, mut exprs) => ir::Expression::Primitive(
|
||||
loc,
|
||||
ir::Primitive::try_from(name.as_str()).unwrap(),
|
||||
exprs.drain(..).map(Into::into).collect(),
|
||||
),
|
||||
impl syntax::Statement {
|
||||
/// Simplify a syntax::Statement into a series of ir::Statements.
|
||||
///
|
||||
/// The reason this function is one-to-many is because we may have to
|
||||
/// introduce new binding sites in order to avoid having nested
|
||||
/// expressions. Nested expressions, like `(1 + 2) * 3`, are allowed
|
||||
/// in syntax::Expression but are expressly *not* allowed in
|
||||
/// ir::Expression. So this pass converts them into bindings, like
|
||||
/// this:
|
||||
///
|
||||
/// x = (1 + 2) * 3;
|
||||
///
|
||||
/// ==>
|
||||
///
|
||||
/// x:1 = 1 + 2;
|
||||
/// x:2 = x:1 * 3;
|
||||
/// x = x:2
|
||||
///
|
||||
/// Thus ensuring that things are nice and simple. Note that the
|
||||
/// binding of `x:2` is not, strictly speaking, necessary, but it
|
||||
/// makes the code below much easier to read.
|
||||
fn simplify(self) -> Vec<ir::Statement> {
|
||||
let mut new_statements = vec![];
|
||||
|
||||
match self {
|
||||
// Print statements we don't have to do much with
|
||||
syntax::Statement::Print(loc, name) => {
|
||||
new_statements.push(ir::Statement::Print(loc, ArcIntern::new(name)))
|
||||
}
|
||||
|
||||
// Bindings, however, may involve a single expression turning into
|
||||
// a series of statements and then an expression.
|
||||
syntax::Statement::Binding(loc, name, value) => {
|
||||
let (mut prereqs, new_value) = value.rebind(&name);
|
||||
new_statements.append(&mut prereqs);
|
||||
new_statements.push(ir::Statement::Binding(
|
||||
loc,
|
||||
ArcIntern::new(name),
|
||||
new_value.into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
new_statements
|
||||
}
|
||||
}
|
||||
|
||||
impl syntax::Expression {
|
||||
/// This actually does the meat of the simplification work, here, by rebinding
|
||||
/// any nested expressions into their own variables. We have this return
|
||||
/// `ValueOrRef` in all cases because it makes for slighly less code; in the
|
||||
/// case when we actually want an `Expression`, we can just use `into()`.
|
||||
fn rebind(self, base_name: &str) -> (Vec<ir::Statement>, ir::ValueOrRef) {
|
||||
match self {
|
||||
// Values just convert in the obvious way, and require no prereqs
|
||||
syntax::Expression::Value(loc, val) => (vec![], ValueOrRef::Value(loc, val.into())),
|
||||
|
||||
// Similarly, references just convert in the obvious way, and require
|
||||
// no prereqs
|
||||
syntax::Expression::Reference(loc, name) => {
|
||||
ir::Expression::Reference(loc, ArcIntern::from(name))
|
||||
}
|
||||
syntax::Expression::Value(loc, value) => {
|
||||
ir::Expression::Value(loc, ir::Value::from(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<syntax::Expression> for ir::ValueOrRef {
|
||||
fn from(value: syntax::Expression) -> Self {
|
||||
match value {
|
||||
syntax::Expression::Primitive(loc, _, _) => {
|
||||
panic!("{:?}: couldn't convert to valueorref", loc)
|
||||
(vec![], ValueOrRef::Ref(loc, ArcIntern::new(name)))
|
||||
}
|
||||
|
||||
syntax::Expression::Reference(loc, var) => {
|
||||
ir::ValueOrRef::Ref(loc, ArcIntern::new(var))
|
||||
}
|
||||
// Primitive expressions are where we do the real work.
|
||||
syntax::Expression::Primitive(loc, prim, mut expressions) => {
|
||||
// generate a fresh new name for the binding site we're going to
|
||||
// introduce, basing the name on wherever we came from; so if this
|
||||
// expression was bound to `x` originally, it might become `x:23`.
|
||||
//
|
||||
// gensym is guaranteed to give us a name that is unused anywhere
|
||||
// else in the program.
|
||||
let new_name = gensym(base_name);
|
||||
let mut prereqs = Vec::new();
|
||||
let mut new_exprs = Vec::new();
|
||||
|
||||
syntax::Expression::Value(loc, val) => ir::ValueOrRef::Value(loc, val.into()),
|
||||
// here we loop through every argument, and recurse on the expressions
|
||||
// we find. that will give us any new binding sites that *they* introduce,
|
||||
// and a simple value or reference that we can use in our result.
|
||||
for expr in expressions.drain(..) {
|
||||
let (mut cur_prereqs, arg) = expr.rebind(new_name.as_str());
|
||||
prereqs.append(&mut cur_prereqs);
|
||||
new_exprs.push(arg);
|
||||
}
|
||||
|
||||
// now we're going to use those new arguments to run the primitive, binding
|
||||
// the results to the new variable we introduced.
|
||||
let prim =
|
||||
ir::Primitive::try_from(prim.as_str()).expect("is valid primitive function");
|
||||
prereqs.push(ir::Statement::Binding(
|
||||
loc.clone(),
|
||||
new_name.clone(),
|
||||
ir::Expression::Primitive(loc.clone(), prim, new_exprs),
|
||||
));
|
||||
|
||||
// and finally, we can return all the new bindings, and a reference to
|
||||
// the variable we just introduced to hold the value of the primitive
|
||||
// invocation.
|
||||
(prereqs, ValueOrRef::Ref(loc, new_name))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<syntax::Value> for ir::Value {
|
||||
fn from(x: syntax::Value) -> Self {
|
||||
match x {
|
||||
syntax::Value::Number(base, value) => ir::Value::Number(base, value),
|
||||
fn from(value: syntax::Value) -> Self {
|
||||
match value {
|
||||
syntax::Value::Number(base, val) => ir::Value::Number(base, val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for ir::Primitive {
|
||||
fn from(value: String) -> Self {
|
||||
value.try_into().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a fresh new name based on the given name.
|
||||
///
|
||||
/// The new name is guaranteed to be unique across the entirety of the
|
||||
/// execution. This is achieved by using characters in the variable name
|
||||
/// that would not be valid input, and by including a counter that is
|
||||
/// incremented on every invocation.
|
||||
fn gensym(name: &str) -> ArcIntern<String> {
|
||||
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
let new_name = format!(
|
||||
"<{}:{}>",
|
||||
name,
|
||||
COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
|
||||
);
|
||||
ArcIntern::new(new_name)
|
||||
}
|
||||
|
||||
proptest::proptest! {
|
||||
#[test]
|
||||
fn translation_maintains_semantics(input: syntax::Program) {
|
||||
let syntax_result = input.eval();
|
||||
let ir = ir::Program::from(input.simplify());
|
||||
let ir = ir::Program::from(input);
|
||||
let ir_result = ir.eval();
|
||||
assert_eq!(syntax_result, ir_result);
|
||||
}
|
||||
|
||||
@@ -3,6 +3,10 @@ use internment::ArcIntern;
|
||||
use std::collections::HashSet;
|
||||
|
||||
impl Program {
|
||||
/// Get the complete list of strings used within the program.
|
||||
///
|
||||
/// For the purposes of this function, strings are the variables used in
|
||||
/// `print` statements.
|
||||
pub fn strings(&self) -> HashSet<ArcIntern<String>> {
|
||||
let mut result = HashSet::new();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user