Files
ngr/src/ir/ast.rs
2023-12-02 22:38:44 -08:00

568 lines
20 KiB
Rust

use crate::{
eval::PrimitiveType,
syntax::{self, ConstantType, Location},
util::pretty::{pretty_comma_separated, PrettySymbol},
};
use internment::ArcIntern;
use pretty::{BoxAllocator, DocAllocator, Pretty};
use proptest::{
prelude::Arbitrary,
strategy::{BoxedStrategy, Strategy},
};
use std::{fmt, str::FromStr, sync::atomic::AtomicUsize};
/// We're going to represent variables as interned strings.
///
/// These should be fast enough for comparison that it's OK, since it's going to end up
/// being pretty much the pointer to the string.
pub type Variable = ArcIntern<String>;
/// Generate a new symbol that is guaranteed to be different from every other symbol
/// currently known.
///
/// This function will use the provided string as a base name for the symbol, but
/// extend it with numbers and characters to make it unique. While technically you
/// could roll-over these symbols, you probably don't need to worry about it.
pub fn gensym(base: &str) -> Variable {
static COUNTER: AtomicUsize = AtomicUsize::new(0);
ArcIntern::new(format!(
"{}<{}>",
base,
COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
))
}
/// The representation of a program within our IR. For now, this is exactly one file.
///
/// A program consists of a series of statements and functions. The statements should
/// be executed in order. The functions currently may not reference any variables
/// at the top level, so their order only matters in relation to each other (functions
/// may not be referenced before they are defined).
///
/// `Program` implements both [`Pretty`] and [`Arbitrary`]. The former should be used
/// to print the structure whenever possible, especially if you value your or your
/// user's time. The latter is useful for testing that conversions of `Program` retain
/// their meaning. All `Program`s generated through [`Arbitrary`] are guaranteed to be
/// syntactically valid, although they may contain runtime issue like over- or underflow.
///
/// The type variable is, somewhat confusingly, the current definition of a type within
/// the IR. Since the makeup of this structure may change over the life of the compiler,
/// it's easiest to just make it an argument.
#[derive(Debug)]
pub struct Program<Type> {
// For now, a program is just a vector of statements. In the future, we'll probably
// extend this to include a bunch of other information, but for now: just a list.
pub(crate) items: Vec<TopLevel<Type>>,
}
impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Program<Type>
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
&'b Type: Pretty<'a, D, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
let mut result = allocator.nil();
for stmt in self.items.iter() {
// there's probably a better way to do this, rather than constantly
// adding to the end, but this works.
result = result
.append(stmt.pretty(allocator))
.append(allocator.text(";"))
.append(allocator.hardline());
}
result
}
}
impl<Type: core::fmt::Debug> Arbitrary for Program<Type> {
type Parameters = crate::syntax::arbitrary::GenerationEnvironment;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
unimplemented!()
//crate::syntax::Program::arbitrary_with(args)
// .prop_map(|x| {
// x.type_infer()
// .expect("arbitrary_with should generate type-correct programs")
// })
// .boxed()
}
}
/// A thing that can sit at the top level of a file.
///
/// For the moment, these are statements and functions. Other things
/// will likely be added in the future, but for now: just statements
/// and functions
#[derive(Debug)]
pub enum TopLevel<Type> {
Statement(Expression<Type>),
Function(Variable, Vec<(Variable, Type)>, Type, Expression<Type>),
}
impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b TopLevel<Type>
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
&'b Type: Pretty<'a, D, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
TopLevel::Function(name, args, _, expr) => allocator
.text("function")
.append(allocator.space())
.append(allocator.text(name.as_ref().to_string()))
.append(allocator.space())
.append(
pretty_comma_separated(
allocator,
&args.iter().map(|(x, _)| PrettySymbol::from(x)).collect(),
)
.parens(),
)
.append(allocator.space())
.append(expr.pretty(allocator)),
TopLevel::Statement(stmt) => stmt.pretty(allocator),
}
}
}
/// The representation of an expression.
///
/// Note that expressions, like everything else in this syntax tree,
/// supports [`Pretty`], and it's strongly encouraged that you use
/// that trait/module when printing these structures.
///
/// Also, Expressions at this point in the compiler are explicitly
/// defined so that they are *not* recursive. By this point, if an
/// expression requires some other data (like, for example, invoking
/// a primitive), any subexpressions have been bound to variables so
/// that the referenced data will always either be a constant or a
/// variable reference.
#[derive(Debug)]
pub enum Expression<Type> {
Atomic(ValueOrRef<Type>),
Cast(Location, Type, ValueOrRef<Type>),
Primitive(Location, Type, Primitive, Vec<ValueOrRef<Type>>),
Block(Location, Type, Vec<Expression<Type>>),
Print(Location, Variable),
Bind(Location, Variable, Type, Box<Expression<Type>>),
}
impl<Type: Clone + TypeWithVoid> Expression<Type> {
/// Return a reference to the type of the expression, as inferred or recently
/// computed.
pub fn type_of(&self) -> Type {
match self {
Expression::Atomic(ValueOrRef::Ref(_, t, _)) => t.clone(),
Expression::Atomic(ValueOrRef::Value(_, t, _)) => t.clone(),
Expression::Cast(_, t, _) => t.clone(),
Expression::Primitive(_, t, _, _) => t.clone(),
Expression::Block(_, t, _) => t.clone(),
Expression::Print(_, _) => Type::void(),
Expression::Bind(_, _, _, _) => Type::void(),
}
}
/// Return a reference to the location associated with the expression.
pub fn location(&self) -> &Location {
match self {
Expression::Atomic(ValueOrRef::Ref(l, _, _)) => l,
Expression::Atomic(ValueOrRef::Value(l, _, _)) => l,
Expression::Cast(l, _, _) => l,
Expression::Primitive(l, _, _, _) => l,
Expression::Block(l, _, _) => l,
Expression::Print(l, _) => l,
Expression::Bind(l, _, _, _) => l,
}
}
}
impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b Expression<Type>
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
&'b Type: Pretty<'a, D, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Expression::Atomic(x) => x.pretty(allocator),
Expression::Cast(_, t, e) => allocator
.text("<")
.append(t.pretty(allocator))
.append(allocator.text(">"))
.append(e.pretty(allocator)),
Expression::Primitive(_, _, op, exprs) if exprs.len() == 1 => {
op.pretty(allocator).append(exprs[0].pretty(allocator))
}
Expression::Primitive(_, _, op, exprs) if exprs.len() == 2 => {
let left = exprs[0].pretty(allocator);
let right = exprs[1].pretty(allocator);
left.append(allocator.space())
.append(op.pretty(allocator))
.append(allocator.space())
.append(right)
.parens()
}
Expression::Primitive(_, _, op, exprs) => {
allocator.text(format!("!!{:?} with {} arguments!!", op, exprs.len()))
}
Expression::Block(_, _, exprs) => match exprs.split_last() {
None => allocator.text("()"),
Some((last, &[])) => last.pretty(allocator),
Some((last, start)) => {
let mut result = allocator.text("{").append(allocator.hardline());
for stmt in start.iter() {
result = result
.append(stmt.pretty(allocator))
.append(allocator.text(";"))
.append(allocator.hardline());
}
result
.append(last.pretty(allocator))
.append(allocator.hardline())
.append(allocator.text("}"))
}
},
Expression::Print(_, var) => allocator
.text("print")
.append(allocator.space())
.append(allocator.text(var.as_ref().to_string())),
Expression::Bind(_, var, _, expr) => allocator
.text(var.as_ref().to_string())
.append(allocator.space())
.append(allocator.text("="))
.append(allocator.space())
.append(expr.pretty(allocator)),
}
}
}
/// A type representing the primitives allowed in the language.
///
/// Having this as an enumeration avoids a lot of "this should not happen"
/// cases, but might prove to be cumbersome in the future. If that happens,
/// this may either become a more hierarchical enumeration, or we'll just
/// deal with the "this should not happen" cases.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Primitive {
Plus,
Minus,
Times,
Divide,
}
impl FromStr for Primitive {
type Err = String;
fn from_str(value: &str) -> Result<Self, Self::Err> {
match value {
"+" => Ok(Primitive::Plus),
"-" => Ok(Primitive::Minus),
"*" => Ok(Primitive::Times),
"/" => Ok(Primitive::Divide),
_ => Err(format!("Illegal primitive {}", value)),
}
}
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Primitive
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Primitive::Plus => allocator.text("+"),
Primitive::Minus => allocator.text("-"),
Primitive::Times => allocator.text("*"),
Primitive::Divide => allocator.text("/"),
}
}
}
impl fmt::Display for Primitive {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<&Primitive as Pretty<'_, BoxAllocator, ()>>::pretty(self, &BoxAllocator).render_fmt(72, f)
}
}
/// An expression that is always either a value or a reference.
///
/// This is the type used to guarantee that we don't nest expressions
/// at this level. Instead, expressions that take arguments take one
/// of these, which can only be a constant or a reference.
#[derive(Clone, Debug)]
pub enum ValueOrRef<Type> {
Value(Location, Type, Value),
Ref(Location, Type, ArcIntern<String>),
}
impl<'a, 'b, D, A, Type> Pretty<'a, D, A> for &'b ValueOrRef<Type>
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
ValueOrRef::Value(_, _, v) => v.pretty(allocator),
ValueOrRef::Ref(_, _, v) => allocator.text(v.as_ref().to_string()),
}
}
}
impl<Type> From<ValueOrRef<Type>> for Expression<Type> {
fn from(value: ValueOrRef<Type>) -> Self {
Expression::Atomic(value)
}
}
/// A constant in the IR.
///
/// The optional argument in numeric types is the base that was used by the
/// user to input the number. By retaining it, we can ensure that if we need
/// to print the number back out, we can do so in the form that the user
/// entered it.
#[derive(Clone, Debug)]
pub enum Value {
I8(Option<u8>, i8),
I16(Option<u8>, i16),
I32(Option<u8>, i32),
I64(Option<u8>, i64),
U8(Option<u8>, u8),
U16(Option<u8>, u16),
U32(Option<u8>, u32),
U64(Option<u8>, u64),
}
impl Value {
/// Return the type described by this value
pub fn type_of(&self) -> Type {
match self {
Value::I8(_, _) => Type::Primitive(PrimitiveType::I8),
Value::I16(_, _) => Type::Primitive(PrimitiveType::I16),
Value::I32(_, _) => Type::Primitive(PrimitiveType::I32),
Value::I64(_, _) => Type::Primitive(PrimitiveType::I64),
Value::U8(_, _) => Type::Primitive(PrimitiveType::U8),
Value::U16(_, _) => Type::Primitive(PrimitiveType::U16),
Value::U32(_, _) => Type::Primitive(PrimitiveType::U32),
Value::U64(_, _) => Type::Primitive(PrimitiveType::U64),
}
}
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
let pretty_internal = |opt_base: &Option<u8>, x, t| {
syntax::Value::Number(*opt_base, Some(t), x).pretty(allocator)
};
let pretty_internal_signed = |opt_base, x: i64, t| {
let base = pretty_internal(opt_base, x.unsigned_abs(), t);
allocator.text("-").append(base)
};
match self {
Value::I8(opt_base, value) => {
pretty_internal_signed(opt_base, *value as i64, ConstantType::I8)
}
Value::I16(opt_base, value) => {
pretty_internal_signed(opt_base, *value as i64, ConstantType::I16)
}
Value::I32(opt_base, value) => {
pretty_internal_signed(opt_base, *value as i64, ConstantType::I32)
}
Value::I64(opt_base, value) => {
pretty_internal_signed(opt_base, *value, ConstantType::I64)
}
Value::U8(opt_base, value) => {
pretty_internal(opt_base, *value as u64, ConstantType::U8)
}
Value::U16(opt_base, value) => {
pretty_internal(opt_base, *value as u64, ConstantType::U16)
}
Value::U32(opt_base, value) => {
pretty_internal(opt_base, *value as u64, ConstantType::U32)
}
Value::U64(opt_base, value) => pretty_internal(opt_base, *value, ConstantType::U64),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Type {
Primitive(PrimitiveType),
Function(Vec<Type>, Box<Type>),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Type
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
Type::Primitive(pt) => allocator.text(format!("{}", pt)),
Type::Function(args, rettype) => {
pretty_comma_separated(allocator, &args.iter().collect())
.parens()
.append(allocator.space())
.append(allocator.text("->"))
.append(allocator.space())
.append(rettype.pretty(allocator))
}
}
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Type::Primitive(pt) => pt.fmt(f),
Type::Function(args, ret) => {
write!(f, "(")?;
let mut argiter = args.iter().peekable();
while let Some(arg) = argiter.next() {
arg.fmt(f)?;
if argiter.peek().is_some() {
write!(f, ",")?;
}
}
write!(f, "->")?;
ret.fmt(f)
}
}
}
}
impl From<PrimitiveType> for Type {
fn from(value: PrimitiveType) -> Self {
Type::Primitive(value)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum TypeOrVar {
Primitive(PrimitiveType),
Variable(Location, ArcIntern<String>),
Function(Vec<TypeOrVar>, Box<TypeOrVar>),
}
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TypeOrVar
where
A: 'a,
D: ?Sized + DocAllocator<'a, A>,
{
fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> {
match self {
TypeOrVar::Primitive(x) => allocator.text(format!("{}", x)),
TypeOrVar::Variable(_, x) => allocator.text(x.to_string()),
TypeOrVar::Function(args, rettype) => {
pretty_comma_separated(allocator, &args.iter().collect())
.parens()
.append(allocator.space())
.append(allocator.text("->"))
.append(allocator.space())
.append(rettype.pretty(allocator))
}
}
}
}
impl fmt::Display for TypeOrVar {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TypeOrVar::Primitive(x) => x.fmt(f),
TypeOrVar::Variable(_, v) => write!(f, "{}", v),
TypeOrVar::Function(args, rettype) => {
write!(f, "<function:")?;
match args.split_last() {
None => write!(f, "()")?,
Some((single, &[])) => {
write!(f, "({})", single)?;
}
Some((last_one, rest)) => {
write!(f, "(")?;
for arg in rest.iter() {
write!(f, "{}, ", arg);
}
write!(f, "{})", last_one)?;
}
}
write!(f, "->")?;
rettype.fmt(f)?;
write!(f, ">")
}
}
}
}
impl TypeOrVar {
/// Generate a fresh type variable that is different from all previous type variables.
///
/// This type variable is guaranteed to be unique across the process lifetime. Overuse
/// of this function could potentially cause overflow problems, but you're going to have
/// to try really hard (like, 2^64 times) to make that happen. The location bound to
/// this address will be purely manufactured; if you want to specify a location, use
/// [`TypeOrVar::new_located`].
pub fn new() -> Self {
Self::new_located(Location::manufactured())
}
/// Generate a fresh type variable that is different from all previous type variables.
///
/// This type variable is guaranteed to be unique across the process lifetime. Overuse
/// of this function could potentially cause overflow problems, but you're going to have
/// to try really hard (like, 2^64 times) to make that happen.
pub fn new_located(loc: Location) -> Self {
TypeOrVar::Variable(loc, gensym("t"))
}
}
trait TypeWithVoid {
fn void() -> Self;
}
impl TypeWithVoid for Type {
fn void() -> Self {
Type::Primitive(PrimitiveType::Void)
}
}
impl TypeWithVoid for TypeOrVar {
fn void() -> Self {
TypeOrVar::Primitive(PrimitiveType::Void)
}
}
//impl From<Type> for TypeOrVar {
// fn from(value: Type) -> Self {
// TypeOrVar::Type(value)
// }
//}
impl<T: Into<Type>> From<T> for TypeOrVar {
fn from(value: T) -> Self {
match value.into() {
Type::Primitive(p) => TypeOrVar::Primitive(p),
Type::Function(args, ret) => TypeOrVar::Function(
args.into_iter().map(Into::into).collect(),
Box::new((*ret).into()),
),
}
}
}