Files
ngr/src/ir/ast.rs
Adam Wick 1fbfd0c2d2 📜 Add better documentation across the compiler. (#3)
These changes pay particular attention to API endpoints, to try to
ensure that any rustdocs generated are detailed and sensible. A good
next step, eventually, might be to include doctest examples, as well.
For the moment, it's not clear that they would provide a lot of value,
though.

In addition, this does a couple refactors to simplify the code base in
ways that make things clearer or, at least, briefer.
2023-05-13 15:00:08 -05:00

259 lines
8.6 KiB
Rust

use crate::syntax::Location;
use internment::ArcIntern;
use pretty::{DocAllocator, Pretty};
use proptest::{
prelude::Arbitrary,
strategy::{BoxedStrategy, Strategy},
};
/// We're going to represent variables as interned strings.
///
/// These should be fast enough for comparison that it's OK, since it's going to end up
/// being pretty much the pointer to the string.
type Variable = ArcIntern<String>;
/// The representation of a program within our IR. For now, this is exactly one file.
///
/// In addition, for the moment there's not really much of interest to hold here besides
/// the list of statements read from the file. Order is important. In the future, you
/// could imagine caching analysis information in this structure.
///
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
/// to print the structure whenever possible, especially if you value your or your
/// user's time. The latter is useful for testing that conversions of `Program` retain
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
/// syntactically valid, although they may contain runtime issue like over- or underflow.
#[derive(Debug)]
pub struct Program {
// For now, a program is just a vector of statements. In the future, we'll probably
// extend this to include a bunch of other information, but for now: just a list.
pub(crate) statements: Vec<Statement>,
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
let mut result = allocator.nil();
for stmt in self.statements.iter() {
// there's probably a better way to do this, rather than constantly
// adding to the end, but this works.
result = result
.append(stmt.pretty(allocator))
.append(allocator.text(";"))
.append(allocator.hardline());
}
result
}
}
impl Arbitrary for Program {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
crate::syntax::Program::arbitrary_with(args)
.prop_map(Program::from)
.boxed()
}
}
/// The representation of a statement in the language.
///
/// For now, this is either a binding site (`x = 4`) or a print statement
/// (`print x`). Someday, though, more!
///
/// As with `Program`, this type implements [`Pretty`], which should
/// be used to display the structure whenever possible. It does not
/// implement [`Arbitrary`], though, mostly because it's slightly
/// complicated to do so.
///
#[derive(Debug)]
pub enum Statement {
Binding(Location, Variable, Expression),
Print(Location, Variable),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Statement
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Statement::Binding(_, var, expr) => allocator
.text(var.as_ref().to_string())
.append(allocator.space())
.append(allocator.text("="))
.append(allocator.space())
.append(expr.pretty(allocator)),
Statement::Print(_, var) => allocator
.text("print")
.append(allocator.space())
.append(allocator.text(var.as_ref().to_string())),
}
}
}
/// The representation of an expression.
///
/// Note that expressions, like everything else in this syntax tree,
/// supports [`Pretty`], and it's strongly encouraged that you use
/// that trait/module when printing these structures.
///
/// Also, Expressions at this point in the compiler are explicitly
/// defined so that they are *not* recursive. By this point, if an
/// expression requires some other data (like, for example, invoking
/// a primitive), any subexpressions have been bound to variables so
/// that the referenced data will always either be a constant or a
/// variable reference.
#[derive(Debug)]
pub enum Expression {
Value(Location, Value),
Reference(Location, Variable),
Primitive(Location, Primitive, Vec<ValueOrRef>),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Expression
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Expression::Value(_, val) => val.pretty(allocator),
Expression::Reference(_, var) => allocator.text(var.as_ref().to_string()),
Expression::Primitive(_, op, exprs) if exprs.len() == 1 => {
op.pretty(allocator).append(exprs[0].pretty(allocator))
}
Expression::Primitive(_, op, exprs) if exprs.len() == 2 => {
let left = exprs[0].pretty(allocator);
let right = exprs[1].pretty(allocator);
left.append(allocator.space())
.append(op.pretty(allocator))
.append(allocator.space())
.append(right)
.parens()
}
Expression::Primitive(_, op, exprs) => {
allocator.text(format!("!!{:?} with {} arguments!!", op, exprs.len()))
}
}
}
}
/// A type representing the primitives allowed in the language.
///
/// Having this as an enumeration avoids a lot of "this should not happen"
/// cases, but might prove to be cumbersome in the future. If that happens,
/// this may either become a more hierarchical enumeration, or we'll just
/// deal with the "this should not happen" cases.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Primitive {
Plus,
Minus,
Times,
Divide,
}
impl<'a> TryFrom<&'a str> for Primitive {
type Error = String;
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"+" => Ok(Primitive::Plus),
"-" => Ok(Primitive::Minus),
"*" => Ok(Primitive::Times),
"/" => Ok(Primitive::Divide),
_ => Err(format!("Illegal primitive {}", value)),
}
}
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Primitive
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Primitive::Plus => allocator.text("+"),
Primitive::Minus => allocator.text("-"),
Primitive::Times => allocator.text("*"),
Primitive::Divide => allocator.text("/"),
}
}
}
/// An expression that is always either a value or a reference.
///
/// This is the type used to guarantee that we don't nest expressions
/// at this level. Instead, expressions that take arguments take one
/// of these, which can only be a constant or a reference.
#[derive(Debug)]
pub enum ValueOrRef {
Value(Location, Value),
Ref(Location, ArcIntern<String>),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b ValueOrRef
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
ValueOrRef::Value(_, v) => v.pretty(allocator),
ValueOrRef::Ref(_, v) => allocator.text(v.as_ref().to_string()),
}
}
}
impl From<ValueOrRef> for Expression {
fn from(value: ValueOrRef) -> Self {
match value {
ValueOrRef::Value(loc, val) => Expression::Value(loc, val),
ValueOrRef::Ref(loc, var) => Expression::Reference(loc, var),
}
}
}
/// A constant in the IR.
#[derive(Debug)]
pub enum Value {
/// A numerical constant.
///
/// The optional argument is the base that was used by the user to input
/// the number. By retaining it, we can ensure that if we need to print the
/// number back out, we can do so in the form that the user entered it.
Number(Option<u8>, i64),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Value::Number(opt_base, value) => {
let value_str = match opt_base {
None => format!("{}", value),
Some(2) => format!("0b{:b}", value),
Some(8) => format!("0o{:o}", value),
Some(10) => format!("0d{}", value),
Some(16) => format!("0x{:x}", value),
Some(_) => format!("!!{:x}!!", value),
};
allocator.text(value_str)
}
}
}
}