ngr/src/ir/ast.rs

use crate::{
    eval::PrimitiveType,
    syntax::{self, ConstantType, Location},
    util::pretty::{pretty_comma_separated, PrettySymbol},
};
use internment::ArcIntern;
use pretty::{BoxAllocator, DocAllocator, Pretty};
use proptest::{
    prelude::Arbitrary,
    strategy::{BoxedStrategy, Strategy},
};
use std::{fmt, str::FromStr, sync::atomic::AtomicUsize};

/// We're going to represent variables as interned strings.
///
/// These should be fast enough for comparison that it's OK, since it's going to end up
/// being pretty much the pointer to the string.
pub type Variable = ArcIntern<String>;

/// Generate a new symbol that is guaranteed to be different from every other symbol
/// currently known.
///
/// This function will use the provided string as a base name for the symbol, but
/// extend it with numbers and characters to make it unique. While technically you
/// could roll-over these symbols, you probably don't need to worry about it.
pub fn gensym(base: &str) -> Variable {
    static COUNTER: AtomicUsize = AtomicUsize::new(0);

    ArcIntern::new(format!(
        "{}<{}>",
        base,
        COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
    ))
}

/// The representation of a program within our IR. For now, this is exactly one file.
///
/// A program consists of a series of statements and functions. The statements should
/// be executed in order. The functions currently may not reference any variables
/// at the top level, so their order only matters in relation to each other (functions
/// may not be referenced before they are defined).
///
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
/// to print the structure whenever possible, especially if you value your or your
/// user's time. The latter is useful for testing that conversions of `Program` retain
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
/// syntactically valid, although they may contain runtime issue like over- or underflow.
///
/// The type variable is, somewhat confusingly, the current definition of a type within
/// the IR. Since the makeup of this structure may change over the life of the compiler,
/// it's easiest to just make it an argument.
#[derive(Debug)]
pub struct Program<Type> {
    // For now, a program is just a vector of statements. In the future, we'll probably
    // extend this to include a bunch of other information, but for now: just a list.
    pub(crate) items: Vec<TopLevel<Type>>,
}

impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Program<Type>
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
    &'b Type: Pretty<'a, D, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        let mut result = allocator.nil();

        for stmt in self.items.iter() {
            // there's probably a better way to do this, rather than constantly
            // adding to the end, but this works.
            result = result
                .append(stmt.pretty(allocator))
                .append(allocator.text(";"))
                .append(allocator.hardline());
        }

        result
    }
}

impl<Type: core::fmt::Debug> Arbitrary for Program<Type> {
    type Parameters = crate::syntax::arbitrary::GenerationEnvironment;
    type Strategy = BoxedStrategy<Self>;

    fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
        unimplemented!()
        //crate::syntax::Program::arbitrary_with(args)
        //    .prop_map(|x| {
        //        x.type_infer()
        //            .expect("arbitrary_with should generate type-correct programs")
        //    })
        //    .boxed()
    }
}

/// A thing that can sit at the top level of a file.
///
/// For the moment, these are statements and functions. Other things
/// will likely be added in the future, but for now: just statements
/// and functions
#[derive(Debug)]
pub enum TopLevel<Type> {
    Statement(Expression<Type>),
    Function(Variable, Vec<(Variable, Type)>, Type, Expression<Type>),
}

impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b TopLevel<Type>
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
    &'b Type: Pretty<'a, D, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            TopLevel::Function(name, args, _, expr) => allocator
                .text("function")
                .append(allocator.space())
                .append(allocator.text(name.as_ref().to_string()))
                .append(allocator.space())
                .append(
                    pretty_comma_separated(
                        allocator,
                        &args.iter().map(|(x, _)| PrettySymbol::from(x)).collect(),
                    )
                    .parens(),
                )
                .append(allocator.space())
                .append(expr.pretty(allocator)),

            TopLevel::Statement(stmt) => stmt.pretty(allocator),
        }
    }
}

/// The representation of an expression.
///
/// Note that expressions, like everything else in this syntax tree,
/// supports [`Pretty`], and it's strongly encouraged that you use
/// that trait/module when printing these structures.
///
/// Also, Expressions at this point in the compiler are explicitly
/// defined so that they are *not* recursive. By this point, if an
/// expression requires some other data (like, for example, invoking
/// a primitive), any subexpressions have been bound to variables so
/// that the referenced data will always either be a constant or a
/// variable reference.
#[derive(Debug)]
pub enum Expression<Type> {
    Atomic(ValueOrRef<Type>),
    Cast(Location, Type, ValueOrRef<Type>),
    Primitive(Location, Type, Primitive, Vec<ValueOrRef<Type>>),
    Block(Location, Type, Vec<Expression<Type>>),
    Print(Location, Variable),
    Bind(Location, Variable, Type, Box<Expression<Type>>),
}

impl<Type: Clone + TypeWithVoid> Expression<Type> {
    /// Return a reference to the type of the expression, as inferred or recently
    /// computed.
    pub fn type_of(&self) -> Type {
        match self {
            Expression::Atomic(ValueOrRef::Ref(_, t, _)) => t.clone(),
            Expression::Atomic(ValueOrRef::Value(_, t, _)) => t.clone(),
            Expression::Cast(_, t, _) => t.clone(),
            Expression::Primitive(_, t, _, _) => t.clone(),
            Expression::Block(_, t, _) => t.clone(),
            Expression::Print(_, _) => Type::void(),
            Expression::Bind(_, _, _, _) => Type::void(),
        }
    }

    /// Return a reference to the location associated with the expression.
    pub fn location(&self) -> &Location {
        match self {
            Expression::Atomic(ValueOrRef::Ref(l, _, _)) => l,
            Expression::Atomic(ValueOrRef::Value(l, _, _)) => l,
            Expression::Cast(l, _, _) => l,
            Expression::Primitive(l, _, _, _) => l,
            Expression::Block(l, _, _) => l,
            Expression::Print(l, _) => l,
            Expression::Bind(l, _, _, _) => l,
        }
    }
}

impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Expression<Type>
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
    &'b Type: Pretty<'a, D, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Expression::Atomic(x) => x.pretty(allocator),
            Expression::Cast(_, t, e) => allocator
                .text("<")
                .append(t.pretty(allocator))
                .append(allocator.text(">"))
                .append(e.pretty(allocator)),
            Expression::Primitive(_, _, op, exprs) if exprs.len() == 1 => {
                op.pretty(allocator).append(exprs[0].pretty(allocator))
            }
            Expression::Primitive(_, _, op, exprs) if exprs.len() == 2 => {
                let left = exprs[0].pretty(allocator);
                let right = exprs[1].pretty(allocator);

                left.append(allocator.space())
                    .append(op.pretty(allocator))
                    .append(allocator.space())
                    .append(right)
                    .parens()
            }
            Expression::Primitive(_, _, op, exprs) => {
                allocator.text(format!("!!{:?} with {} arguments!!", op, exprs.len()))
            }
            Expression::Block(_, _, exprs) => match exprs.split_last() {
                None => allocator.text("()"),
                Some((last, &[])) => last.pretty(allocator),
                Some((last, start)) => {
                    let mut result = allocator.text("{").append(allocator.hardline());

                    for stmt in start.iter() {
                        result = result
                            .append(stmt.pretty(allocator))
                            .append(allocator.text(";"))
                            .append(allocator.hardline());
                    }

                    result
                        .append(last.pretty(allocator))
                        .append(allocator.hardline())
                        .append(allocator.text("}"))
                }
            },
            Expression::Print(_, var) => allocator
                .text("print")
                .append(allocator.space())
                .append(allocator.text(var.as_ref().to_string())),
            Expression::Bind(_, var, _, expr) => allocator
                .text(var.as_ref().to_string())
                .append(allocator.space())
                .append(allocator.text("="))
                .append(allocator.space())
                .append(expr.pretty(allocator)),
        }
    }
}

/// A type representing the primitives allowed in the language.
///
/// Having this as an enumeration avoids a lot of "this should not happen"
/// cases, but might prove to be cumbersome in the future. If that happens,
/// this may either become a more hierarchical enumeration, or we'll just
/// deal with the "this should not happen" cases.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Primitive {
    Plus,
    Minus,
    Times,
    Divide,
}

impl FromStr for Primitive {
    type Err = String;

    fn from_str(value: &str) -> Result<Self, Self::Err> {
        match value {
            "+" => Ok(Primitive::Plus),
            "-" => Ok(Primitive::Minus),
            "*" => Ok(Primitive::Times),
            "/" => Ok(Primitive::Divide),
            _ => Err(format!("Illegal primitive {}", value)),
        }
    }
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Primitive
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Primitive::Plus => allocator.text("+"),
            Primitive::Minus => allocator.text("-"),
            Primitive::Times => allocator.text("*"),
            Primitive::Divide => allocator.text("/"),
        }
    }
}

impl fmt::Display for Primitive {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        <&Primitive as Pretty<'_, BoxAllocator, ()>>::pretty(self, &BoxAllocator).render_fmt(72, f)
    }
}

/// An expression that is always either a value or a reference.
///
/// This is the type used to guarantee that we don't nest expressions
/// at this level. Instead, expressions that take arguments take one
/// of these, which can only be a constant or a reference.
#[derive(Clone, Debug)]
pub enum ValueOrRef<Type> {
    Value(Location, Type, Value),
    Ref(Location, Type, ArcIntern<String>),
}

impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b ValueOrRef<Type>
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            ValueOrRef::Value(_, _, v) => v.pretty(allocator),
            ValueOrRef::Ref(_, _, v) => allocator.text(v.as_ref().to_string()),
        }
    }
}

impl<Type> From<ValueOrRef<Type>> for Expression<Type> {
    fn from(value: ValueOrRef<Type>) -> Self {
        Expression::Atomic(value)
    }
}

/// A constant in the IR.
///
/// The optional argument in numeric types is the base that was used by the
/// user to input the number. By retaining it, we can ensure that if we need
/// to print the number back out, we can do so in the form that the user
/// entered it.
#[derive(Clone, Debug)]
pub enum Value {
    I8(Option<u8>, i8),
    I16(Option<u8>, i16),
    I32(Option<u8>, i32),
    I64(Option<u8>, i64),
    U8(Option<u8>, u8),
    U16(Option<u8>, u16),
    U32(Option<u8>, u32),
    U64(Option<u8>, u64),
}

impl Value {
    /// Return the type described by this value
    pub fn type_of(&self) -> Type {
        match self {
            Value::I8(_, _) => Type::Primitive(PrimitiveType::I8),
            Value::I16(_, _) => Type::Primitive(PrimitiveType::I16),
            Value::I32(_, _) => Type::Primitive(PrimitiveType::I32),
            Value::I64(_, _) => Type::Primitive(PrimitiveType::I64),
            Value::U8(_, _) => Type::Primitive(PrimitiveType::U8),
            Value::U16(_, _) => Type::Primitive(PrimitiveType::U16),
            Value::U32(_, _) => Type::Primitive(PrimitiveType::U32),
            Value::U64(_, _) => Type::Primitive(PrimitiveType::U64),
        }
    }
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        let pretty_internal = |opt_base: &Option<u8>, x, t| {
            syntax::Value::Number(*opt_base, Some(t), x).pretty(allocator)
        };

        let pretty_internal_signed = |opt_base, x: i64, t| {
            let base = pretty_internal(opt_base, x.unsigned_abs(), t);

            allocator.text("-").append(base)
        };

        match self {
            Value::I8(opt_base, value) => {
                pretty_internal_signed(opt_base, *value as i64, ConstantType::I8)
            }
            Value::I16(opt_base, value) => {
                pretty_internal_signed(opt_base, *value as i64, ConstantType::I16)
            }
            Value::I32(opt_base, value) => {
                pretty_internal_signed(opt_base, *value as i64, ConstantType::I32)
            }
            Value::I64(opt_base, value) => {
                pretty_internal_signed(opt_base, *value, ConstantType::I64)
            }
            Value::U8(opt_base, value) => {
                pretty_internal(opt_base, *value as u64, ConstantType::U8)
            }
            Value::U16(opt_base, value) => {
                pretty_internal(opt_base, *value as u64, ConstantType::U16)
            }
            Value::U32(opt_base, value) => {
                pretty_internal(opt_base, *value as u64, ConstantType::U32)
            }
            Value::U64(opt_base, value) => pretty_internal(opt_base, *value, ConstantType::U64),
        }
    }
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Type {
    Primitive(PrimitiveType),
    Function(Vec<Type>, Box<Type>),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Type
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            Type::Primitive(pt) => allocator.text(format!("{}", pt)),
            Type::Function(args, rettype) => {
                pretty_comma_separated(allocator, &args.iter().collect())
                    .parens()
                    .append(allocator.space())
                    .append(allocator.text("->"))
                    .append(allocator.space())
                    .append(rettype.pretty(allocator))
            }
        }
    }
}

impl fmt::Display for Type {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Type::Primitive(pt) => pt.fmt(f),
            Type::Function(args, ret) => {
                write!(f, "(")?;
                let mut argiter = args.iter().peekable();
                while let Some(arg) = argiter.next() {
                    arg.fmt(f)?;
                    if argiter.peek().is_some() {
                        write!(f, ",")?;
                    }
                }
                write!(f, "->")?;
                ret.fmt(f)
            }
        }
    }
}

impl From<PrimitiveType> for Type {
    fn from(value: PrimitiveType) -> Self {
        Type::Primitive(value)
    }
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum TypeOrVar {
    Primitive(PrimitiveType),
    Variable(Location, ArcIntern<String>),
    Function(Vec<TypeOrVar>, Box<TypeOrVar>),
}

impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TypeOrVar
where
    A: 'a,
    D: ?Sized + DocAllocator<'a, A>,
{
    fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
        match self {
            TypeOrVar::Primitive(x) => allocator.text(format!("{}", x)),
            TypeOrVar::Variable(_, x) => allocator.text(x.to_string()),
            TypeOrVar::Function(args, rettype) => {
                pretty_comma_separated(allocator, &args.iter().collect())
                    .parens()
                    .append(allocator.space())
                    .append(allocator.text("->"))
                    .append(allocator.space())
                    .append(rettype.pretty(allocator))
            }
        }
    }
}

impl fmt::Display for TypeOrVar {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            TypeOrVar::Primitive(x) => x.fmt(f),
            TypeOrVar::Variable(_, v) => write!(f, "{}", v),
            TypeOrVar::Function(args, rettype) => {
                write!(f, "<function:")?;
                match args.split_last() {
                    None => write!(f, "()")?,
                    Some((single, &[])) => {
                        write!(f, "({})", single)?;
                    }
                    Some((last_one, rest)) => {
                        write!(f, "(")?;
                        for arg in rest.iter() {
                            write!(f, "{}, ", arg);
                        }
                        write!(f, "{})", last_one)?;
                    }
                }
                write!(f, "->")?;
                rettype.fmt(f)?;
                write!(f, ">")
            }
        }
    }
}

impl TypeOrVar {
    /// Generate a fresh type variable that is different from all previous type variables.
    ///
    /// This type variable is guaranteed to be unique across the process lifetime. Overuse
    /// of this function could potentially cause overflow problems, but you're going to have
    /// to try really hard (like, 2^64 times) to make that happen. The location bound to
    /// this address will be purely manufactured; if you want to specify a location, use
    /// [`TypeOrVar::new_located`].
    pub fn new() -> Self {
        Self::new_located(Location::manufactured())
    }

    /// Generate a fresh type variable that is different from all previous type variables.
    ///
    /// This type variable is guaranteed to be unique across the process lifetime. Overuse
    /// of this function could potentially cause overflow problems, but you're going to have
    /// to try really hard (like, 2^64 times) to make that happen.
    pub fn new_located(loc: Location) -> Self {
        TypeOrVar::Variable(loc, gensym("t"))
    }
}

trait TypeWithVoid {
    fn void() -> Self;
}

impl TypeWithVoid for Type {
    fn void() -> Self {
        Type::Primitive(PrimitiveType::Void)
    }
}

impl TypeWithVoid for TypeOrVar {
    fn void() -> Self {
        TypeOrVar::Primitive(PrimitiveType::Void)
    }
}

//impl From<Type> for TypeOrVar {
//    fn from(value: Type) -> Self {
//        TypeOrVar::Type(value)
//    }
//}

impl<T: Into<Type>> From<T> for TypeOrVar {
    fn from(value: T) -> Self {
        match value.into() {
            Type::Primitive(p) => TypeOrVar::Primitive(p),
            Type::Function(args, ret) => TypeOrVar::Function(
                args.into_iter().map(Into::into).collect(),
                Box::new((*ret).into()),
            ),
        }
    }
}