use crate::{ eval::PrimitiveType, syntax::{self, ConstantType, Location}, util::pretty::{pretty_comma_separated, PrettySymbol}, }; use internment::ArcIntern; use pretty::{BoxAllocator, DocAllocator, Pretty}; use proptest::{ prelude::Arbitrary, strategy::{BoxedStrategy, Strategy}, }; use std::{fmt, str::FromStr, sync::atomic::AtomicUsize}; /// We're going to represent variables as interned strings. /// /// These should be fast enough for comparison that it's OK, since it's going to end up /// being pretty much the pointer to the string. pub type Variable = ArcIntern; /// Generate a new symbol that is guaranteed to be different from every other symbol /// currently known. /// /// This function will use the provided string as a base name for the symbol, but /// extend it with numbers and characters to make it unique. While technically you /// could roll-over these symbols, you probably don't need to worry about it. pub fn gensym(base: &str) -> Variable { static COUNTER: AtomicUsize = AtomicUsize::new(0); ArcIntern::new(format!( "{}<{}>", base, COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst) )) } /// The representation of a program within our IR. For now, this is exactly one file. /// /// A program consists of a series of statements and functions. The statements should /// be executed in order. The functions currently may not reference any variables /// at the top level, so their order only matters in relation to each other (functions /// may not be referenced before they are defined). /// /// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used /// to print the structure whenever possible, especially if you value your or your /// user's time. The latter is useful for testing that conversions of `Program` retain /// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be /// syntactically valid, although they may contain runtime issue like over- or underflow. /// /// The type variable is, somewhat confusingly, the current definition of a type within /// the IR. Since the makeup of this structure may change over the life of the compiler, /// it's easiest to just make it an argument. #[derive(Debug)] pub struct Program { // For now, a program is just a vector of statements. In the future, we'll probably // extend this to include a bunch of other information, but for now: just a list. pub(crate) items: Vec>, } impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Program where A: 'a, D: ?Sized + DocAllocator<'a, A>, &'b Type: Pretty<'a, D, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { let mut result = allocator.nil(); for stmt in self.items.iter() { // there's probably a better way to do this, rather than constantly // adding to the end, but this works. result = result .append(stmt.pretty(allocator)) .append(allocator.text(";")) .append(allocator.hardline()); } result } } impl Arbitrary for Program { type Parameters = crate::syntax::arbitrary::GenerationEnvironment; type Strategy = BoxedStrategy; fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { unimplemented!() //crate::syntax::Program::arbitrary_with(args) // .prop_map(|x| { // x.type_infer() // .expect("arbitrary_with should generate type-correct programs") // }) // .boxed() } } /// A thing that can sit at the top level of a file. /// /// For the moment, these are statements and functions. Other things /// will likely be added in the future, but for now: just statements /// and functions #[derive(Debug)] pub enum TopLevel { Statement(Expression), Function(Variable, Vec<(Variable, Type)>, Type, Expression), } impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b TopLevel where A: 'a, D: ?Sized + DocAllocator<'a, A>, &'b Type: Pretty<'a, D, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { TopLevel::Function(name, args, _, expr) => allocator .text("function") .append(allocator.space()) .append(allocator.text(name.as_ref().to_string())) .append(allocator.space()) .append( pretty_comma_separated( allocator, &args.iter().map(|(x, _)| PrettySymbol::from(x)).collect(), ) .parens(), ) .append(allocator.space()) .append(expr.pretty(allocator)), TopLevel::Statement(stmt) => stmt.pretty(allocator), } } } /// The representation of an expression. /// /// Note that expressions, like everything else in this syntax tree, /// supports [`Pretty`], and it's strongly encouraged that you use /// that trait/module when printing these structures. /// /// Also, Expressions at this point in the compiler are explicitly /// defined so that they are *not* recursive. By this point, if an /// expression requires some other data (like, for example, invoking /// a primitive), any subexpressions have been bound to variables so /// that the referenced data will always either be a constant or a /// variable reference. #[derive(Debug)] pub enum Expression { Atomic(ValueOrRef), Cast(Location, Type, ValueOrRef), Primitive(Location, Type, Primitive, Vec>), Block(Location, Type, Vec>), Print(Location, Variable), Bind(Location, Variable, Type, Box>), } impl Expression { /// Return a reference to the type of the expression, as inferred or recently /// computed. pub fn type_of(&self) -> Type { match self { Expression::Atomic(ValueOrRef::Ref(_, t, _)) => t.clone(), Expression::Atomic(ValueOrRef::Value(_, t, _)) => t.clone(), Expression::Cast(_, t, _) => t.clone(), Expression::Primitive(_, t, _, _) => t.clone(), Expression::Block(_, t, _) => t.clone(), Expression::Print(_, _) => Type::void(), Expression::Bind(_, _, _, _) => Type::void(), } } /// Return a reference to the location associated with the expression. pub fn location(&self) -> &Location { match self { Expression::Atomic(ValueOrRef::Ref(l, _, _)) => l, Expression::Atomic(ValueOrRef::Value(l, _, _)) => l, Expression::Cast(l, _, _) => l, Expression::Primitive(l, _, _, _) => l, Expression::Block(l, _, _) => l, Expression::Print(l, _) => l, Expression::Bind(l, _, _, _) => l, } } } impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Expression where A: 'a, D: ?Sized + DocAllocator<'a, A>, &'b Type: Pretty<'a, D, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { Expression::Atomic(x) => x.pretty(allocator), Expression::Cast(_, t, e) => allocator .text("<") .append(t.pretty(allocator)) .append(allocator.text(">")) .append(e.pretty(allocator)), Expression::Primitive(_, _, op, exprs) if exprs.len() == 1 => { op.pretty(allocator).append(exprs[0].pretty(allocator)) } Expression::Primitive(_, _, op, exprs) if exprs.len() == 2 => { let left = exprs[0].pretty(allocator); let right = exprs[1].pretty(allocator); left.append(allocator.space()) .append(op.pretty(allocator)) .append(allocator.space()) .append(right) .parens() } Expression::Primitive(_, _, op, exprs) => { allocator.text(format!("!!{:?} with {} arguments!!", op, exprs.len())) } Expression::Block(_, _, exprs) => match exprs.split_last() { None => allocator.text("()"), Some((last, &[])) => last.pretty(allocator), Some((last, start)) => { let mut result = allocator.text("{").append(allocator.hardline()); for stmt in start.iter() { result = result .append(stmt.pretty(allocator)) .append(allocator.text(";")) .append(allocator.hardline()); } result .append(last.pretty(allocator)) .append(allocator.hardline()) .append(allocator.text("}")) } }, Expression::Print(_, var) => allocator .text("print") .append(allocator.space()) .append(allocator.text(var.as_ref().to_string())), Expression::Bind(_, var, _, expr) => allocator .text(var.as_ref().to_string()) .append(allocator.space()) .append(allocator.text("=")) .append(allocator.space()) .append(expr.pretty(allocator)), } } } /// A type representing the primitives allowed in the language. /// /// Having this as an enumeration avoids a lot of "this should not happen" /// cases, but might prove to be cumbersome in the future. If that happens, /// this may either become a more hierarchical enumeration, or we'll just /// deal with the "this should not happen" cases. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Primitive { Plus, Minus, Times, Divide, } impl FromStr for Primitive { type Err = String; fn from_str(value: &str) -> Result { match value { "+" => Ok(Primitive::Plus), "-" => Ok(Primitive::Minus), "*" => Ok(Primitive::Times), "/" => Ok(Primitive::Divide), _ => Err(format!("Illegal primitive {}", value)), } } } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Primitive where A: 'a, D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { Primitive::Plus => allocator.text("+"), Primitive::Minus => allocator.text("-"), Primitive::Times => allocator.text("*"), Primitive::Divide => allocator.text("/"), } } } impl fmt::Display for Primitive { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { <&Primitive as Pretty<'_, BoxAllocator, ()>>::pretty(self, &BoxAllocator).render_fmt(72, f) } } /// An expression that is always either a value or a reference. /// /// This is the type used to guarantee that we don't nest expressions /// at this level. Instead, expressions that take arguments take one /// of these, which can only be a constant or a reference. #[derive(Clone, Debug)] pub enum ValueOrRef { Value(Location, Type, Value), Ref(Location, Type, ArcIntern), } impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b ValueOrRef where A: 'a, D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { ValueOrRef::Value(_, _, v) => v.pretty(allocator), ValueOrRef::Ref(_, _, v) => allocator.text(v.as_ref().to_string()), } } } impl From> for Expression { fn from(value: ValueOrRef) -> Self { Expression::Atomic(value) } } /// A constant in the IR. /// /// The optional argument in numeric types is the base that was used by the /// user to input the number. By retaining it, we can ensure that if we need /// to print the number back out, we can do so in the form that the user /// entered it. #[derive(Clone, Debug)] pub enum Value { I8(Option, i8), I16(Option, i16), I32(Option, i32), I64(Option, i64), U8(Option, u8), U16(Option, u16), U32(Option, u32), U64(Option, u64), } impl Value { /// Return the type described by this value pub fn type_of(&self) -> Type { match self { Value::I8(_, _) => Type::Primitive(PrimitiveType::I8), Value::I16(_, _) => Type::Primitive(PrimitiveType::I16), Value::I32(_, _) => Type::Primitive(PrimitiveType::I32), Value::I64(_, _) => Type::Primitive(PrimitiveType::I64), Value::U8(_, _) => Type::Primitive(PrimitiveType::U8), Value::U16(_, _) => Type::Primitive(PrimitiveType::U16), Value::U32(_, _) => Type::Primitive(PrimitiveType::U32), Value::U64(_, _) => Type::Primitive(PrimitiveType::U64), } } } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value where A: 'a, D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { let pretty_internal = |opt_base: &Option, x, t| { syntax::Value::Number(*opt_base, Some(t), x).pretty(allocator) }; let pretty_internal_signed = |opt_base, x: i64, t| { let base = pretty_internal(opt_base, x.unsigned_abs(), t); allocator.text("-").append(base) }; match self { Value::I8(opt_base, value) => { pretty_internal_signed(opt_base, *value as i64, ConstantType::I8) } Value::I16(opt_base, value) => { pretty_internal_signed(opt_base, *value as i64, ConstantType::I16) } Value::I32(opt_base, value) => { pretty_internal_signed(opt_base, *value as i64, ConstantType::I32) } Value::I64(opt_base, value) => { pretty_internal_signed(opt_base, *value, ConstantType::I64) } Value::U8(opt_base, value) => { pretty_internal(opt_base, *value as u64, ConstantType::U8) } Value::U16(opt_base, value) => { pretty_internal(opt_base, *value as u64, ConstantType::U16) } Value::U32(opt_base, value) => { pretty_internal(opt_base, *value as u64, ConstantType::U32) } Value::U64(opt_base, value) => pretty_internal(opt_base, *value, ConstantType::U64), } } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Type { Primitive(PrimitiveType), Function(Vec, Box), } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Type where A: 'a, D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { Type::Primitive(pt) => allocator.text(format!("{}", pt)), Type::Function(args, rettype) => { pretty_comma_separated(allocator, &args.iter().collect()) .parens() .append(allocator.space()) .append(allocator.text("->")) .append(allocator.space()) .append(rettype.pretty(allocator)) } } } } impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Type::Primitive(pt) => pt.fmt(f), Type::Function(args, ret) => { write!(f, "(")?; let mut argiter = args.iter().peekable(); while let Some(arg) = argiter.next() { arg.fmt(f)?; if argiter.peek().is_some() { write!(f, ",")?; } } write!(f, "->")?; ret.fmt(f) } } } } impl From for Type { fn from(value: PrimitiveType) -> Self { Type::Primitive(value) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum TypeOrVar { Primitive(PrimitiveType), Variable(Location, ArcIntern), Function(Vec, Box), } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TypeOrVar where A: 'a, D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { match self { TypeOrVar::Primitive(x) => allocator.text(format!("{}", x)), TypeOrVar::Variable(_, x) => allocator.text(x.to_string()), TypeOrVar::Function(args, rettype) => { pretty_comma_separated(allocator, &args.iter().collect()) .parens() .append(allocator.space()) .append(allocator.text("->")) .append(allocator.space()) .append(rettype.pretty(allocator)) } } } } impl fmt::Display for TypeOrVar { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TypeOrVar::Primitive(x) => x.fmt(f), TypeOrVar::Variable(_, v) => write!(f, "{}", v), TypeOrVar::Function(args, rettype) => { write!(f, " write!(f, "()")?, Some((single, &[])) => { write!(f, "({})", single)?; } Some((last_one, rest)) => { write!(f, "(")?; for arg in rest.iter() { write!(f, "{}, ", arg); } write!(f, "{})", last_one)?; } } write!(f, "->")?; rettype.fmt(f)?; write!(f, ">") } } } } impl TypeOrVar { /// Generate a fresh type variable that is different from all previous type variables. /// /// This type variable is guaranteed to be unique across the process lifetime. Overuse /// of this function could potentially cause overflow problems, but you're going to have /// to try really hard (like, 2^64 times) to make that happen. The location bound to /// this address will be purely manufactured; if you want to specify a location, use /// [`TypeOrVar::new_located`]. pub fn new() -> Self { Self::new_located(Location::manufactured()) } /// Generate a fresh type variable that is different from all previous type variables. /// /// This type variable is guaranteed to be unique across the process lifetime. Overuse /// of this function could potentially cause overflow problems, but you're going to have /// to try really hard (like, 2^64 times) to make that happen. pub fn new_located(loc: Location) -> Self { TypeOrVar::Variable(loc, gensym("t")) } } trait TypeWithVoid { fn void() -> Self; } impl TypeWithVoid for Type { fn void() -> Self { Type::Primitive(PrimitiveType::Void) } } impl TypeWithVoid for TypeOrVar { fn void() -> Self { TypeOrVar::Primitive(PrimitiveType::Void) } } //impl From for TypeOrVar { // fn from(value: Type) -> Self { // TypeOrVar::Type(value) // } //} impl> From for TypeOrVar { fn from(value: T) -> Self { match value.into() { Type::Primitive(p) => TypeOrVar::Primitive(p), Type::Function(args, ret) => TypeOrVar::Function( args.into_iter().map(Into::into).collect(), Box::new((*ret).into()), ), } } }