ngr/src/ir/ast.rs

use crate::syntax::Location;
use internment::ArcIntern;
use pretty::{DocAllocator, Pretty};
use proptest::{
    prelude::Arbitrary,
    strategy::{BoxedStrategy, Strategy},
};

/// We're going to represent variables as interned strings.
///
/// These should be fast enough for comparison that it's OK, since it's going to end up
/// being pretty much the pointer to the string.
type Variable = ArcIntern<String>;

/// The representation of a program within our IR. For now, this is exactly one file.
///
/// In addition, for the moment there's not really much of interest to hold here besides
/// the list of statements read from the file. Order is important. In the future, you
/// could imagine caching analysis information in this structure.
///
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
/// to print the structure whenever possible, especially if you value your or your
/// user's time. The latter is useful for testing that conversions of `Program` retain
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
/// syntactically valid, although they may contain runtime issue like over- or underflow.
#[derive(Debug)]
pub struct Program {
    // For now, a program is just a vector of statements. In the future, we'll probably
    // extend this to include a bunch of other information, but for now: just a list.
    pub(crate) statements: Vec<Statement>,
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        let mut result = allocator.nil();

        for stmt in self.statements.iter() {
            // there's probably a better way to do this, rather than constantly
            // adding to the end, but this works.
            result = result
                .append(stmt.pretty(allocator))
                .append(allocator.text(";"))
                .append(allocator.hardline());
        }

        result
    }
}

impl Arbitrary for Program {
    type Parameters = ();
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
        crate::syntax::Program::arbitrary_with(args)
            .prop_map(Program::from)
            .boxed()
    }
}

/// The representation of a statement in the language.
///
/// For now, this is either a binding site (`x = 4`) or a print statement
/// (`print x`). Someday, though, more!
///
/// As with `Program`, this type implements [`Pretty`], which should
/// be used to display the structure whenever possible. It does not
/// implement [`Arbitrary`], though, mostly because it's slightly
/// complicated to do so.
///
#[derive(Debug)]
pub enum Statement {
    Binding(Location, Variable, Expression),
    Print(Location, Variable),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Statement
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Statement::Binding(_, var, expr) => allocator
                .text(var.as_ref().to_string())
                .append(allocator.space())
                .append(allocator.text("="))
                .append(allocator.space())
                .append(expr.pretty(allocator)),
            Statement::Print(_, var) => allocator
                .text("print")
                .append(allocator.space())
                .append(allocator.text(var.as_ref().to_string())),
        }
    }
}

/// The representation of an expression.
///
/// Note that expressions, like everything else in this syntax tree,
/// supports [`Pretty`], and it's strongly encouraged that you use
/// that trait/module when printing these structures.
///
/// Also, Expressions at this point in the compiler are explicitly
/// defined so that they are *not* recursive. By this point, if an
/// expression requires some other data (like, for example, invoking
/// a primitive), any subexpressions have been bound to variables so
/// that the referenced data will always either be a constant or a
/// variable reference.
#[derive(Debug)]
pub enum Expression {
    Value(Location, Value),
    Reference(Location, Variable),
    Primitive(Location, Primitive, Vec<ValueOrRef>),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Expression
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Expression::Value(_, val) => val.pretty(allocator),
            Expression::Reference(_, var) => allocator.text(var.as_ref().to_string()),
            Expression::Primitive(_, op, exprs) if exprs.len() == 1 => {
                op.pretty(allocator).append(exprs[0].pretty(allocator))
            }
            Expression::Primitive(_, op, exprs) if exprs.len() == 2 => {
                let left = exprs[0].pretty(allocator);
                let right = exprs[1].pretty(allocator);

                left.append(allocator.space())
                    .append(op.pretty(allocator))
                    .append(allocator.space())
                    .append(right)
                    .parens()
            }
            Expression::Primitive(_, op, exprs) => {
                allocator.text(format!("!!{:?} with {} arguments!!", op, exprs.len()))
            }
        }
    }
}

/// A type representing the primitives allowed in the language.
///
/// Having this as an enumeration avoids a lot of "this should not happen"
/// cases, but might prove to be cumbersome in the future. If that happens,
/// this may either become a more hierarchical enumeration, or we'll just
/// deal with the "this should not happen" cases.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Primitive {
    Plus,
    Minus,
    Times,
    Divide,
}

impl<'a> TryFrom<&'a str> for Primitive {
    type Error = String;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        match value {
            "+" => Ok(Primitive::Plus),
            "-" => Ok(Primitive::Minus),
            "*" => Ok(Primitive::Times),
            "/" => Ok(Primitive::Divide),
            _ => Err(format!("Illegal primitive {}", value)),
        }
    }
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Primitive
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Primitive::Plus => allocator.text("+"),
            Primitive::Minus => allocator.text("-"),
            Primitive::Times => allocator.text("*"),
            Primitive::Divide => allocator.text("/"),
        }
    }
}

/// An expression that is always either a value or a reference.
///
/// This is the type used to guarantee that we don't nest expressions
/// at this level. Instead, expressions that take arguments take one
/// of these, which can only be a constant or a reference.
#[derive(Debug)]
pub enum ValueOrRef {
    Value(Location, Value),
    Ref(Location, ArcIntern<String>),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b ValueOrRef
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            ValueOrRef::Value(_, v) => v.pretty(allocator),
            ValueOrRef::Ref(_, v) => allocator.text(v.as_ref().to_string()),
        }
    }
}

impl From<ValueOrRef> for Expression {
    fn from(value: ValueOrRef) -> Self {
        match value {
            ValueOrRef::Value(loc, val) => Expression::Value(loc, val),
            ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var),
        }
    }
}

/// A constant in the IR.
#[derive(Debug)]
pub enum Value {
    /// A numerical constant.
    ///
    /// The optional argument is the base that was used by the user to input
    /// the number. By retaining it, we can ensure that if we need to print the
    /// number back out, we can do so in the form that the user entered it.
    Number(Option<u8>, i64),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Value::Number(opt_base, value) => {
                let value_str = match opt_base {
                    None => format!("{}", value),
                    Some(2) => format!("0b{:b}", value),
                    Some(8) => format!("0o{:o}", value),
                    Some(10) => format!("0d{}", value),
                    Some(16) => format!("0x{:x}", value),
                    Some(_) => format!("!!{:x}!!", value),
                };

                allocator.text(value_str)
            }
        }
    }
}