From 736d27953f51207c6a0ec7821b53b68768540810 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 7 Oct 2023 11:06:28 +0200 Subject: [PATCH] Wire functions through everything, with some unimplemented, and add a basic scoped map. --- src/backend/into_crane.rs | 12 +++--- src/ir/ast.rs | 42 +++++++++++++++++++- src/ir/eval.rs | 10 +++-- src/ir/strings.rs | 13 +++++- src/lib.rs | 1 + src/repl.rs | 26 ++++++------ src/syntax.rs | 16 ++++---- src/syntax/arbitrary.rs | 34 ++++++++-------- src/syntax/ast.rs | 13 +++++- src/syntax/eval.rs | 15 ++++--- src/syntax/parser.lalrpop | 22 ++++++----- src/syntax/pretty.rs | 32 +++++++++++++-- src/syntax/validate.rs | 63 +++++++++++++++++++++++------ src/type_infer/ast.rs | 43 +++++++++++++++++++- src/type_infer/convert.rs | 48 ++++++++++++++++------ src/type_infer/finalize.rs | 17 ++++++-- src/util.rs | 1 + src/util/scoped_map.rs | 81 ++++++++++++++++++++++++++++++++++++++ 18 files changed, 392 insertions(+), 97 deletions(-) create mode 100644 src/util.rs create mode 100644 src/util/scoped_map.rs diff --git a/src/backend/into_crane.rs b/src/backend/into_crane.rs index e398e3d..4d3d7ea 100644 --- a/src/backend/into_crane.rs +++ b/src/backend/into_crane.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use crate::eval::PrimitiveType; -use crate::ir::{Expression, Primitive, Program, Statement, Type, Value, ValueOrRef}; +use crate::ir::{Expression, Primitive, Program, Statement, TopLevel, Type, Value, ValueOrRef}; use crate::syntax::ConstantType; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir::{ @@ -120,12 +120,14 @@ impl Backend { // this is likely to become more cumbersome, and we'll want to separate // these off. But for now, given the amount of tables we keep around to track // state, it's easier to just include them. - for stmt in program.statements.drain(..) { - match stmt { + for item in program.items.drain(..) { + match item { + TopLevel::Function(_, _, _) => unimplemented!(), + // Print statements are fairly easy to compile: we just lookup the // output buffer, the address of the string to print, and the value // of whatever variable we're printing. Then we just call print. - Statement::Print(ann, t, var) => { + TopLevel::Statement(Statement::Print(ann, t, var)) => { // Get the output buffer (or null) from our general compilation context. let buffer_ptr = self.output_buffer_ptr(); let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64); @@ -163,7 +165,7 @@ impl Backend { } // Variable binding is a little more con - Statement::Binding(_, var_name, _, value) => { + TopLevel::Statement(Statement::Binding(_, var_name, _, value)) => { // Kick off to the `Expression` implementation to see what value we're going // to bind to this variable. let (val, etype) = diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 478f393..24c408c 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -31,7 +31,7 @@ type Variable = ArcIntern; pub struct Program { // For now, a program is just a vector of statements. In the future, we'll probably // extend this to include a bunch of other information, but for now: just a list. - pub(crate) statements: Vec, + pub(crate) items: Vec, } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program @@ -42,7 +42,7 @@ where fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { let mut result = allocator.nil(); - for stmt in self.statements.iter() { + for stmt in self.items.iter() { // there's probably a better way to do this, rather than constantly // adding to the end, but this works. result = result @@ -69,6 +69,44 @@ impl Arbitrary for Program { } } +/// A thing that can sit at the top level of a file. +/// +/// For the moment, these are statements and functions. Other things +/// will likely be added in the future, but for now: just statements +/// and functions +#[derive(Debug)] +pub enum TopLevel { + Statement(Statement), + Function(Variable, Vec, Expression), +} + +impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TopLevel +where + A: 'a, + D: ?Sized + DocAllocator<'a, A>, +{ + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + TopLevel::Function(name, args, body) => allocator + .text("function") + .append(allocator.space()) + .append(allocator.text(name.as_ref().to_string())) + .append( + allocator + .intersperse( + args.iter().map(|x| allocator.text(x.as_ref().to_string())), + ", ", + ) + .parens(), + ) + .append(allocator.space()) + .append(body.pretty(allocator)), + + TopLevel::Statement(stmt) => stmt.pretty(allocator), + } + } +} + /// The representation of a statement in the language. /// /// For now, this is either a binding site (`x = 4`) or a print statement diff --git a/src/ir/eval.rs b/src/ir/eval.rs index 6841508..46123f3 100644 --- a/src/ir/eval.rs +++ b/src/ir/eval.rs @@ -1,5 +1,5 @@ use crate::eval::{EvalEnvironment, EvalError, Value}; -use crate::ir::{Expression, Program, Statement}; +use crate::ir::{Expression, Program, Statement, TopLevel}; use super::{Primitive, Type, ValueOrRef}; @@ -12,14 +12,16 @@ impl Program { let mut env = EvalEnvironment::empty(); let mut stdout = String::new(); - for stmt in self.statements.iter() { + for stmt in self.items.iter() { match stmt { - Statement::Binding(_, name, _, value) => { + TopLevel::Function(_, _, _) => unimplemented!(), + + TopLevel::Statement(Statement::Binding(_, name, _, value)) => { let actual_value = value.eval(&env)?; env = env.extend(name.clone(), actual_value); } - Statement::Print(_, _, name) => { + TopLevel::Statement(Statement::Print(_, _, name)) => { let value = env.lookup(name.clone())?; let line = format!("{} = {}\n", name, value); stdout.push_str(&line); diff --git a/src/ir/strings.rs b/src/ir/strings.rs index 70f939f..6326672 100644 --- a/src/ir/strings.rs +++ b/src/ir/strings.rs @@ -1,4 +1,4 @@ -use super::ast::{Expression, Program, Statement}; +use super::ast::{Expression, Program, Statement, TopLevel}; use internment::ArcIntern; use std::collections::HashSet; @@ -10,7 +10,7 @@ impl Program { pub fn strings(&self) -> HashSet> { let mut result = HashSet::new(); - for stmt in self.statements.iter() { + for stmt in self.items.iter() { stmt.register_strings(&mut result); } @@ -18,6 +18,15 @@ impl Program { } } +impl TopLevel { + fn register_strings(&self, string_set: &mut HashSet>) { + match self { + TopLevel::Function(_, _, body) => body.register_strings(string_set), + TopLevel::Statement(stmt) => stmt.register_strings(string_set), + } + } +} + impl Statement { fn register_strings(&self, string_set: &mut HashSet>) { match self { diff --git a/src/lib.rs b/src/lib.rs index c155075..23f924c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -68,6 +68,7 @@ mod examples; pub mod ir; pub mod syntax; pub mod type_infer; +pub mod util; /// Implementation module for the high-level compiler. mod compiler; diff --git a/src/repl.rs b/src/repl.rs index fcf6b62..1600a0f 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -1,13 +1,13 @@ use crate::backend::{Backend, BackendError}; -use crate::syntax::{ConstantType, Location, ParserError, Statement}; +use crate::syntax::{ConstantType, Location, ParserError, Statement, TopLevel}; use crate::type_infer::TypeInferenceResult; +use crate::util::scoped_map::ScopedMap; use codespan_reporting::diagnostic::Diagnostic; use codespan_reporting::files::SimpleFiles; use codespan_reporting::term::{self, Config}; use cranelift_jit::JITModule; use cranelift_module::ModuleError; use pretty::termcolor::{ColorChoice, StandardStream}; -use std::collections::HashMap; /// A high-level REPL helper for NGR. /// @@ -23,7 +23,7 @@ use std::collections::HashMap; pub struct REPL { file_database: SimpleFiles, jitter: Backend, - variable_binding_sites: HashMap, + variable_binding_sites: ScopedMap, console: StandardStream, console_config: Config, } @@ -70,7 +70,7 @@ impl REPL { Ok(REPL { file_database: SimpleFiles::new(), jitter: Backend::jit(None)?, - variable_binding_sites: HashMap::new(), + variable_binding_sites: ScopedMap::new(), console, console_config, }) @@ -127,10 +127,14 @@ impl REPL { .get(entry) .expect("entry exists") .source(); - let syntax = Statement::parse(entry, source)?; + let syntax = TopLevel::parse(entry, source)?; let program = match syntax { - Statement::Binding(loc, name, expr) => { + TopLevel::Function(_, _, _) => { + unimplemented!() + } + + TopLevel::Statement(Statement::Binding(loc, name, expr)) => { // if this is a variable binding, and we've never defined this variable before, // we should tell cranelift about it. this is optimistic; if we fail to compile, // then we won't use this definition until someone tries again. @@ -141,15 +145,15 @@ impl REPL { } crate::syntax::Program { - statements: vec![ - Statement::Binding(loc.clone(), name.clone(), expr), - Statement::Print(loc, name), + items: vec![ + TopLevel::Statement(Statement::Binding(loc.clone(), name.clone(), expr)), + TopLevel::Statement(Statement::Print(loc, name)), ], } } - nonbinding => crate::syntax::Program { - statements: vec![nonbinding], + TopLevel::Statement(nonbinding) => crate::syntax::Program { + items: vec![TopLevel::Statement(nonbinding)], }, }; diff --git a/src/syntax.rs b/src/syntax.rs index 8d505a1..2a8acc8 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -8,7 +8,7 @@ //! //! * Turning the string into a series of language-specific [`Token`]s. //! * Taking those tokens, and computing a basic syntax tree from them, -//! using our parser ([`ProgramParser`] or [`StatementParser`], generated +//! using our parser ([`ProgramParser`] or [`TopLevelParser`], generated //! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)). //! * Validating the tree we have parsed, using [`Program::validate`], //! returning any warnings or errors we have found. @@ -44,7 +44,7 @@ mod validate; use crate::syntax::arbitrary::GenerationEnvironment; pub use crate::syntax::ast::*; pub use crate::syntax::location::Location; -pub use crate::syntax::parser::{ProgramParser, StatementParser}; +pub use crate::syntax::parser::{ProgramParser, TopLevelParser}; pub use crate::syntax::tokens::{LexerError, Token}; #[cfg(test)] use ::pretty::{Arena, Pretty}; @@ -243,18 +243,18 @@ impl Program { } } -impl Statement { - /// Parse a statement that you have in memory, using the given index for [`Location`]s. +impl TopLevel { + /// Parse a top-level item that you have in memory, using the given index for [`Location`]s. /// /// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors /// when you try to print errors, but things should otherwise work fine. This function /// will only parse a single statement, which is useful in the REPL, but probably shouldn't /// be used when reading in whole files. - pub fn parse(file_idx: usize, buffer: &str) -> Result { + pub fn parse(file_idx: usize, buffer: &str) -> Result { let lexer = Token::lexer(buffer) .spanned() .map(|(token, range)| (range.start, token, range.end)); - StatementParser::new() + TopLevelParser::new() .parse(file_idx, lexer) .map_err(|e| ParserError::convert(file_idx, e)) } @@ -276,7 +276,7 @@ fn order_of_operations() { assert_eq!( Program::from_str(muladd1).unwrap(), Program { - statements: vec![Statement::Binding( + items: vec![TopLevel::Statement(Statement::Binding( Location::new(testfile, 0..1), Name::manufactured("x"), Expression::Primitive( @@ -303,7 +303,7 @@ fn order_of_operations() { ) ] ) - ),], + ))], } ); } diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 93759e9..7ee27a1 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,4 +1,4 @@ -use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, Value}; +use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, TopLevel, Value}; use crate::syntax::location::Location; use proptest::sample::select; use proptest::{ @@ -57,38 +57,40 @@ impl Arbitrary for Program { fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy { proptest::collection::vec( - ProgramStatementInfo::arbitrary(), + ProgramTopLevelInfo::arbitrary(), genenv.block_length.clone(), ) - .prop_flat_map(move |mut items| { - let mut statements = Vec::new(); + .prop_flat_map(move |mut ptlis| { + let mut items = Vec::new(); let mut genenv = genenv.clone(); - for psi in items.drain(..) { + for psi in ptlis.drain(..) { if genenv.bindings.is_empty() || psi.should_be_binding { genenv.return_type = psi.binding_type; let expr = Expression::arbitrary_with(genenv.clone()); genenv.bindings.insert(psi.name.clone(), psi.binding_type); - statements.push( + items.push( expr.prop_map(move |expr| { - Statement::Binding(Location::manufactured(), psi.name.clone(), expr) + TopLevel::Statement(Statement::Binding( + Location::manufactured(), + psi.name.clone(), + expr, + )) }) .boxed(), ); } else { let printers = genenv.bindings.keys().map(|n| { - Just(Statement::Print( + Just(TopLevel::Statement(Statement::Print( Location::manufactured(), Name::manufactured(n), - )) + ))) }); - statements.push(Union::new(printers).boxed()); + items.push(Union::new(printers).boxed()); } } - statements - .prop_map(|statements| Program { statements }) - .boxed() + items.prop_map(|items| Program { items }).boxed() }) .boxed() } @@ -104,13 +106,13 @@ impl Arbitrary for Name { } #[derive(Debug)] -struct ProgramStatementInfo { +struct ProgramTopLevelInfo { should_be_binding: bool, name: Name, binding_type: ConstantType, } -impl Arbitrary for ProgramStatementInfo { +impl Arbitrary for ProgramTopLevelInfo { type Parameters = (); type Strategy = BoxedStrategy; @@ -121,7 +123,7 @@ impl Arbitrary for ProgramStatementInfo { ConstantType::arbitrary(), ) .prop_map( - |(should_be_binding, name, binding_type)| ProgramStatementInfo { + |(should_be_binding, name, binding_type)| ProgramTopLevelInfo { should_be_binding, name, binding_type, diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 6700a9f..4749acd 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -16,7 +16,18 @@ use crate::syntax::Location; /// `validate` and it comes back without errors. #[derive(Clone, Debug, PartialEq)] pub struct Program { - pub statements: Vec, + pub items: Vec, +} + +/// A thing that can sit at the top level of a file. +/// +/// For the moment, these are statements and functions. Other things +/// will likely be added in the future, but for now: just statements +/// and functions +#[derive(Clone, Debug, PartialEq)] +pub enum TopLevel { + Statement(Statement), + Function(Name, Vec, Expression), } /// A Name. diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index d6fda74..928d182 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -1,7 +1,7 @@ use internment::ArcIntern; use crate::eval::{EvalEnvironment, EvalError, PrimitiveType, Value}; -use crate::syntax::{ConstantType, Expression, Program, Statement}; +use crate::syntax::{ConstantType, Expression, Program, Statement, TopLevel}; use std::str::FromStr; impl Program { @@ -19,16 +19,19 @@ impl Program { let mut env = EvalEnvironment::empty(); let mut stdout = String::new(); - for stmt in self.statements.iter() { - // at this point, evaluation is pretty simple. just walk through each - // statement, in order, and record printouts as we come to them. + for stmt in self.items.iter() { match stmt { - Statement::Binding(_, name, value) => { + TopLevel::Function(_name, _arg_names, _body) => { + unimplemented!() + } + // at this point, evaluation is pretty simple. just walk through each + // statement, in order, and record printouts as we come to them. + TopLevel::Statement(Statement::Binding(_, name, value)) => { let actual_value = value.eval(&env)?; env = env.extend(name.clone().intern(), actual_value); } - Statement::Print(_, name) => { + TopLevel::Statement(Statement::Print(_, name)) => { let value = env.lookup(name.clone().intern())?; let line = format!("{} = {}\n", name, value); stdout.push_str(&line); diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 3c1d7f8..2a0d882 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -9,7 +9,7 @@ //! eventually want to leave lalrpop behind.) //! use crate::syntax::{LexerError, Location}; -use crate::syntax::ast::{Program,Statement,Expression,Value,Name}; +use crate::syntax::ast::{Program,TopLevel,Statement,Expression,Value,Name}; use crate::syntax::tokens::{ConstantType, Token}; use internment::ArcIntern; @@ -57,21 +57,25 @@ extern { pub Program: Program = { // a program is just a set of statements - => Program { - statements: stmts + => Program { + items } } -ProgramTopLevel: Vec = { - Function => unimplemented!(), - => { - rest.push(next); +ProgramTopLevel: Vec = { + => { + rest.push(t); rest }, => Vec::new(), } -Function: () = { +pub TopLevel: TopLevel = { + => f, + => TopLevel::Statement(s), +} + +Function: TopLevel = { "function" "(" Arguments OptionalComma ")" Expression => unimplemented!(), } @@ -123,7 +127,7 @@ Statements: Vec = { } } -pub Statement: Statement = { +Statement: Statement = { // A statement can be a variable binding. Note, here, that we use this // funny @L thing to get the source location before the variable, so that // we can say that this statement spans across everything. diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index 6a9338f..b0b91cb 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -1,7 +1,7 @@ use crate::syntax::ast::{Expression, Program, Statement, Value}; use pretty::{DocAllocator, DocBuilder, Pretty}; -use super::ConstantType; +use super::{ConstantType, TopLevel}; impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program where @@ -11,9 +11,9 @@ where fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> { let mut result = allocator.nil(); - for stmt in self.statements.iter() { + for tl in self.items.iter() { result = result - .append(stmt.pretty(allocator)) + .append(tl.pretty(allocator)) .append(allocator.text(";")) .append(allocator.hardline()); } @@ -22,6 +22,32 @@ where } } +impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TopLevel +where + A: 'a, + D: ?Sized + DocAllocator<'a, A>, +{ + fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> { + match self { + TopLevel::Statement(stmt) => stmt.pretty(allocator), + TopLevel::Function(name, arg_names, body) => allocator + .text("function") + .append(allocator.space()) + .append(allocator.text(name.to_string())) + .append( + allocator + .intersperse( + arg_names.iter().map(|x| allocator.text(x.to_string())), + CommaSep {}, + ) + .parens(), + ) + .append(allocator.space()) + .append(body.pretty(allocator)), + } + } +} + impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Statement where A: 'a, diff --git a/src/syntax/validate.rs b/src/syntax/validate.rs index 30afe5c..58d25e8 100644 --- a/src/syntax/validate.rs +++ b/src/syntax/validate.rs @@ -1,9 +1,10 @@ use crate::{ eval::PrimitiveType, - syntax::{Expression, Location, Program, Statement}, + syntax::{Expression, Location, Program, Statement, TopLevel}, + util::scoped_map::ScopedMap, }; use codespan_reporting::diagnostic::Diagnostic; -use std::{collections::HashMap, str::FromStr}; +use std::str::FromStr; /// An error we found while validating the input program. /// @@ -65,7 +66,7 @@ impl Program { /// example, and generates warnings for things that are inadvisable but not /// actually a problem. pub fn validate(&self) -> (Vec, Vec) { - let mut bound_variables = HashMap::new(); + let mut bound_variables = ScopedMap::new(); self.validate_with_bindings(&mut bound_variables) } @@ -76,13 +77,13 @@ impl Program { /// actually a problem. pub fn validate_with_bindings( &self, - bound_variables: &mut HashMap, + bound_variables: &mut ScopedMap, ) -> (Vec, Vec) { let mut errors = vec![]; let mut warnings = vec![]; - for stmt in self.statements.iter() { - let (mut new_errors, mut new_warnings) = stmt.validate(bound_variables); + for stmt in self.items.iter() { + let (mut new_errors, mut new_warnings) = stmt.validate_with_bindings(bound_variables); errors.append(&mut new_errors); warnings.append(&mut new_warnings); } @@ -91,6 +92,44 @@ impl Program { } } +impl TopLevel { + /// Validate that the top level item makes semantic sense, not just syntactic + /// sense. + /// + /// This checks for things like references to variables that don't exist, for + /// example, and generates warnings for thins that are inadvisable but not + /// actually a problem. + pub fn validate(&self) -> (Vec, Vec) { + let mut bound_variables = ScopedMap::new(); + self.validate_with_bindings(&mut bound_variables) + } + + /// Validate that the top level item makes semantic sense, not just syntactic + /// sense. + /// + /// This checks for things like references to variables that don't exist, for + /// example, and generates warnings for thins that are inadvisable but not + /// actually a problem. + pub fn validate_with_bindings( + &self, + bound_variables: &mut ScopedMap, + ) -> (Vec, Vec) { + match self { + TopLevel::Function(name, arguments, body) => { + bound_variables.new_scope(); + bound_variables.insert(name.name.clone(), name.location.clone()); + for arg in arguments.iter() { + bound_variables.insert(arg.name.clone(), arg.location.clone()); + } + let result = body.validate(&bound_variables); + bound_variables.release_scope(); + result + } + TopLevel::Statement(stmt) => stmt.validate(bound_variables), + } + } +} + impl Statement { /// Validate that the statement makes semantic sense, not just syntactic sense. /// @@ -103,7 +142,7 @@ impl Statement { /// and warnings. fn validate( &self, - bound_variables: &mut HashMap, + bound_variables: &mut ScopedMap, ) -> (Vec, Vec) { let mut errors = vec![]; let mut warnings = vec![]; @@ -139,7 +178,7 @@ impl Statement { } impl Expression { - fn validate(&self, variable_map: &HashMap) -> (Vec, Vec) { + fn validate(&self, variable_map: &ScopedMap) -> (Vec, Vec) { match self { Expression::Value(_, _) => (vec![], vec![]), Expression::Reference(_, var) if variable_map.contains_key(var) => (vec![], vec![]), @@ -174,14 +213,14 @@ impl Expression { #[test] fn cast_checks_are_reasonable() { - let good_stmt = Statement::parse(0, "x = 4u8;").expect("valid test case"); - let (good_errs, good_warns) = good_stmt.validate(&mut HashMap::new()); + let good_stmt = TopLevel::parse(0, "x = 4u8;").expect("valid test case"); + let (good_errs, good_warns) = good_stmt.validate(); assert!(good_errs.is_empty()); assert!(good_warns.is_empty()); - let bad_stmt = Statement::parse(0, "x = 4u8;").expect("valid test case"); - let (bad_errs, bad_warns) = bad_stmt.validate(&mut HashMap::new()); + let bad_stmt = TopLevel::parse(0, "x = 4u8;").expect("valid test case"); + let (bad_errs, bad_warns) = bad_stmt.validate(); assert!(bad_warns.is_empty()); assert_eq!(bad_errs.len(), 1); diff --git a/src/type_infer/ast.rs b/src/type_infer/ast.rs index ff91a00..373690c 100644 --- a/src/type_infer/ast.rs +++ b/src/type_infer/ast.rs @@ -36,7 +36,7 @@ type Variable = ArcIntern; pub struct Program { // For now, a program is just a vector of statements. In the future, we'll probably // extend this to include a bunch of other information, but for now: just a list. - pub(crate) statements: Vec, + pub(crate) items: Vec, } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program @@ -47,7 +47,7 @@ where fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { let mut result = allocator.nil(); - for stmt in self.statements.iter() { + for stmt in self.items.iter() { // there's probably a better way to do this, rather than constantly // adding to the end, but this works. result = result @@ -60,6 +60,45 @@ where } } +/// A thing that can sit at the top level of a file. +/// +/// For the moment, these are statements and functions. Other things +/// will likely be added in the future, but for now: just statements +/// and functions +#[derive(Debug)] +pub enum TopLevel { + Statement(Statement), + Function(Variable, Vec, Expression), +} + +impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b TopLevel +where + A: 'a, + D: ?Sized + DocAllocator<'a, A>, +{ + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + TopLevel::Function(name, args, body) => allocator + .text("function") + .append(allocator.space()) + .append(allocator.text(name.as_ref().to_string())) + .append(allocator.space()) + .append( + allocator + .intersperse( + args.iter().map(|x| allocator.text(x.as_ref().to_string())), + ", ", + ) + .parens(), + ) + .append(allocator.space()) + .append(body.pretty(allocator)), + + TopLevel::Statement(stmt) => stmt.pretty(allocator), + } + } +} + /// The representation of a statement in the language. /// /// For now, this is either a binding site (`x = 4`) or a print statement diff --git a/src/type_infer/convert.rs b/src/type_infer/convert.rs index e53cdb6..56c5732 100644 --- a/src/type_infer/convert.rs +++ b/src/type_infer/convert.rs @@ -18,20 +18,42 @@ pub fn convert_program( mut program: syntax::Program, constraint_db: &mut Vec, ) -> ir::Program { - let mut statements = Vec::new(); + let mut items = Vec::new(); let mut renames = HashMap::new(); let mut bindings = HashMap::new(); - for stmt in program.statements.drain(..) { - statements.append(&mut convert_statement( - stmt, + for item in program.items.drain(..) { + items.append(&mut convert_top_level( + item, constraint_db, &mut renames, &mut bindings, )); } - ir::Program { statements } + ir::Program { items } +} + +/// This function takes a top-level item and converts it into the IR version of the +/// program, with all the appropriate type variables introduced and their constraints +/// added to the given database. +pub fn convert_top_level( + top_level: syntax::TopLevel, + constraint_db: &mut Vec, + renames: &mut HashMap, ArcIntern>, + bindings: &mut HashMap, Type>, +) -> Vec { + match top_level { + syntax::TopLevel::Function(_, _arg_name, _) => { + unimplemented!() + } + syntax::TopLevel::Statement(stmt) => { + convert_statement(stmt, constraint_db, renames, bindings) + .drain(..) + .map(ir::TopLevel::Statement) + .collect() + } + } } /// This function takes a syntactic statements and converts it into a series of @@ -269,11 +291,11 @@ mod tests { (expr, stmts, constraints, ty) } - fn infer_statement(x: syntax::Statement) -> (Vec, Vec) { + fn infer_top_level(x: syntax::TopLevel) -> (Vec, Vec) { let mut constraints = Vec::new(); let mut renames = HashMap::new(); let mut bindings = HashMap::new(); - let res = convert_statement(x, &mut constraints, &mut renames, &mut bindings); + let res = convert_top_level(x, &mut constraints, &mut renames, &mut bindings); (res, constraints) } @@ -321,24 +343,24 @@ mod tests { #[test] fn one_plus_one_plus_one() { - let stmt = syntax::Statement::parse(1, "x = 1 + 1 + 1;").expect("basic parse"); - let (stmts, constraints) = infer_statement(stmt); + let stmt = syntax::TopLevel::parse(1, "x = 1 + 1 + 1;").expect("basic parse"); + let (stmts, constraints) = infer_top_level(stmt); assert_eq!(stmts.len(), 2); - let ir::Statement::Binding( + let ir::TopLevel::Statement(ir::Statement::Binding( _args, name1, temp_ty1, ir::Expression::Primitive(_, primty1, ir::Primitive::Plus, primargs1), - ) = stmts.get(0).expect("item two") + )) = stmts.get(0).expect("item two") else { panic!("Failed to match first statement"); }; - let ir::Statement::Binding( + let ir::TopLevel::Statement(ir::Statement::Binding( _args, name2, temp_ty2, ir::Expression::Primitive(_, primty2, ir::Primitive::Plus, primargs2), - ) = stmts.get(1).expect("item two") + )) = stmts.get(1).expect("item two") else { panic!("Failed to match second statement"); }; diff --git a/src/type_infer/finalize.rs b/src/type_infer/finalize.rs index 9c70944..7f8f5d6 100644 --- a/src/type_infer/finalize.rs +++ b/src/type_infer/finalize.rs @@ -6,14 +6,25 @@ pub fn finalize_program( resolutions: &TypeResolutions, ) -> output::Program { output::Program { - statements: program - .statements + items: program + .items .drain(..) - .map(|x| finalize_statement(x, resolutions)) + .map(|x| finalize_top_level(x, resolutions)) .collect(), } } +fn finalize_top_level(item: input::TopLevel, resolutions: &TypeResolutions) -> output::TopLevel { + match item { + input::TopLevel::Function(name, args, body) => { + output::TopLevel::Function(name, args, finalize_expression(body, resolutions)) + } + input::TopLevel::Statement(stmt) => { + output::TopLevel::Statement(finalize_statement(stmt, resolutions)) + } + } +} + fn finalize_statement( statement: input::Statement, resolutions: &TypeResolutions, diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..b7271cc --- /dev/null +++ b/src/util.rs @@ -0,0 +1 @@ +pub mod scoped_map; diff --git a/src/util/scoped_map.rs b/src/util/scoped_map.rs new file mode 100644 index 0000000..dc8a9a1 --- /dev/null +++ b/src/util/scoped_map.rs @@ -0,0 +1,81 @@ +use std::{borrow::Borrow, collections::HashMap, hash::Hash}; + +/// A version of [`std::collections::HashMap`] with a built-in notion of scope. +pub struct ScopedMap { + scopes: Vec>, +} + +impl ScopedMap { + /// Generate a new scoped map. + /// + /// In addition to generate the map structure, this method also generates + /// an initial scope for use by the caller. + pub fn new() -> ScopedMap { + ScopedMap { + scopes: vec![HashMap::new()], + } + } + + /// Get a value from the scoped map. + pub fn get(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + for map in self.scopes.iter().rev() { + match map.get(k) { + None => continue, + Some(v) => return Some(v), + } + } + None + } + + /// Returns true if the map contains the given key. + pub fn contains_key(&self, k: &K) -> bool { + self.scopes.iter().any(|x| x.contains_key(k)) + } + + /// Insert a value into the current binding scope. + /// + /// If this variable is bound in the current scope, then its value will be + /// overridden. If it's bound in a previous scope, however, that value will + /// be shadowed, so that its value will preserved if/when the current scope + /// is popped. + pub fn insert(&mut self, k: K, v: V) { + self.scopes + .last_mut() + .expect("tried to insert into ScopedMap with no scopes") + .insert(k, v); + } + + /// Create a new scope. + /// + /// Modifications to this scope will shadow all previous scopes without + /// modifying them. Consider the following examples: + /// + /// ``` + /// use ngr::util::scoped_map::ScopedMap; + /// + /// let mut example1 = ScopedMap::new(); + /// example1.insert(1, true); + /// example1.insert(1, false); + /// assert_eq!(Some(&false), example1.get(&1)); + /// let mut example2 = ScopedMap::new(); + /// example2.insert(1, true); + /// example2.new_scope(); + /// example2.insert(1, false); + /// assert_eq!(Some(&false), example2.get(&1)); + /// example2.release_scope().expect("scope releases"); + /// assert_eq!(Some(&true), example2.get(&1)); + /// ``` + pub fn new_scope(&mut self) { + self.scopes.push(HashMap::new()); + } + + /// Pop the current scope, returning to whatever was bound in the previous + /// scope. If there is no prior scope, `None` will be returned. + pub fn release_scope(&mut self) -> Option> { + self.scopes.pop() + } +}