From 4c2850427a896e7320bd7e6f11fc8a0dc7d950fd Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 29 Apr 2024 21:38:17 -0700 Subject: [PATCH] Start isolating names into their own thing. --- src/lambda_lift.rs | 8 +++ src/lib.rs | 1 + src/repl.rs | 6 +- src/syntax.rs | 2 + src/syntax/arbitrary.rs | 15 ++--- src/syntax/ast.rs | 66 ++-------------------- src/syntax/eval.rs | 18 +++--- src/syntax/name.rs | 113 ++++++++++++++++++++++++++++++++++++++ src/syntax/parser.lalrpop | 5 +- src/syntax/pretty.rs | 4 +- src/syntax/validate.rs | 12 ++-- src/type_infer/convert.rs | 37 +++++++------ 12 files changed, 176 insertions(+), 111 deletions(-) create mode 100644 src/lambda_lift.rs create mode 100644 src/syntax/name.rs diff --git a/src/lambda_lift.rs b/src/lambda_lift.rs new file mode 100644 index 0000000..8880273 --- /dev/null +++ b/src/lambda_lift.rs @@ -0,0 +1,8 @@ +use crate::syntax::{Expression, Name}; +use std::collections::{HashSet, HashMap}; + +impl Expression { + fn free_variables(&self) -> HashSet { + unimplemented!() + } +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 23f924c..fcb21f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,6 +66,7 @@ pub mod eval; #[cfg(test)] mod examples; pub mod ir; +pub mod lambda_lift; pub mod syntax; pub mod type_infer; pub mod util; diff --git a/src/repl.rs b/src/repl.rs index d2730ef..473dfa4 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -134,8 +134,8 @@ impl REPL { // if this is a variable binding, and we've never defined this variable before, // we should tell cranelift about it. this is optimistic; if we fail to compile, // then we won't use this definition until someone tries again. - if !self.variable_binding_sites.contains_key(&name.name) { - self.jitter.define_string(&name.name)?; + if !self.variable_binding_sites.contains_key(&name.current_name().to_string()) { + self.jitter.define_string(name.current_name())?; self.jitter .define_variable(name.to_string(), ConstantType::U64)?; } @@ -149,7 +149,7 @@ impl REPL { loc.clone(), crate::syntax::Name::manufactured("print"), )), - vec![Expression::Reference(loc.clone(), name.name)], + vec![Expression::Reference(name.clone())], )), ], } diff --git a/src/syntax.rs b/src/syntax.rs index 309f7ec..1e279d5 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -31,6 +31,7 @@ pub mod arbitrary; mod ast; pub mod eval; mod location; +mod name; mod tokens; lalrpop_mod!( #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)] @@ -44,6 +45,7 @@ mod validate; use crate::syntax::arbitrary::GenerationEnvironment; pub use crate::syntax::ast::*; pub use crate::syntax::location::Location; +pub use crate::syntax::name::Name; pub use crate::syntax::parser::{ProgramParser, TopLevelParser}; pub use crate::syntax::tokens::{LexerError, Token}; use lalrpop_util::ParseError; diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index e9ab301..c87d941 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,4 +1,5 @@ -use crate::syntax::ast::{ConstantType, Expression, Name, Program, TopLevel, Value}; +use crate::syntax::ast::{ConstantType, Expression, Program, TopLevel, Value}; +use crate::syntax::name::Name; use crate::syntax::location::Location; use proptest::sample::select; use proptest::{ @@ -88,10 +89,7 @@ impl Arbitrary for Program { Location::manufactured(), Name::manufactured("print"), )), - vec![Expression::Reference( - Location::manufactured(), - n.to_string(), - )], + vec![Expression::Reference(n.clone())], ))) }); items.push(Union::new(printers).boxed()); @@ -168,12 +166,7 @@ impl Arbitrary for Expression { } else { let mut strats = bound_variables_of_type .drain(..) - .map(|x| { - Just(Expression::Reference( - Location::manufactured(), - x.name.clone(), - )) - .boxed() + .map(|x| { Just(Expression::Reference(x.clone())).boxed() }) .collect::>(); strats.push(value_strategy); diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index e41d443..c0fa09f 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,10 +1,6 @@ -use std::fmt; -use std::hash::Hash; - -use internment::ArcIntern; - -pub use crate::syntax::tokens::ConstantType; +use crate::syntax::name::Name; use crate::syntax::Location; +pub use crate::syntax::tokens::ConstantType; /// A structure represented a parsed program. /// @@ -30,56 +26,6 @@ pub enum TopLevel { Structure(Location, Name, Vec<(Name, Type)>), } -/// A Name. -/// -/// This is basically a string, but annotated with the place the string -/// is in the source file. -#[derive(Clone, Debug)] -pub struct Name { - pub name: String, - pub location: Location, -} - -impl Name { - pub fn new(n: S, location: Location) -> Name { - Name { - name: n.to_string(), - location, - } - } - - pub fn manufactured(n: S) -> Name { - Name { - name: n.to_string(), - location: Location::manufactured(), - } - } - - pub fn intern(self) -> ArcIntern { - ArcIntern::new(self.name) - } -} - -impl PartialEq for Name { - fn eq(&self, other: &Self) -> bool { - self.name == other.name - } -} - -impl Eq for Name {} - -impl Hash for Name { - fn hash(&self, state: &mut H) { - self.name.hash(state) - } -} - -impl fmt::Display for Name { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.name.fmt(f) - } -} - /// An expression in the underlying syntax. /// /// Like statements, these expressions are guaranteed to have been @@ -90,7 +36,7 @@ impl fmt::Display for Name { pub enum Expression { Value(Location, Value), Constructor(Location, Name, Vec<(Name, Expression)>), - Reference(Location, String), + Reference(Name), FieldRef(Location, Box, Name), Cast(Location, String, Box), Primitive(Location, Name), @@ -130,8 +76,8 @@ impl PartialEq for Expression { Expression::Constructor(_, name2, fields2) => name1 == name2 && fields1 == fields2, _ => false, }, - Expression::Reference(_, var1) => match other { - Expression::Reference(_, var2) => var1 == var2, + Expression::Reference(var1) => match other { + Expression::Reference(var2) => var1 == var2, _ => false, }, Expression::FieldRef(_, exp1, field1) => match other { @@ -174,7 +120,7 @@ impl Expression { match self { Expression::Value(loc, _) => loc, Expression::Constructor(loc, _, _) => loc, - Expression::Reference(loc, _) => loc, + Expression::Reference(n) => n.location(), Expression::FieldRef(loc, _, _) => loc, Expression::Cast(loc, _, _) => loc, Expression::Primitive(loc, _) => loc, diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index ae45ed8..58ac657 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -1,5 +1,5 @@ use crate::eval::{EvalError, PrimitiveType, Value}; -use crate::syntax::{ConstantType, Expression, Name, Program, TopLevel}; +use crate::syntax::{ConstantType, Expression, Program, TopLevel}; use crate::util::scoped_map::ScopedMap; use internment::ArcIntern; use std::collections::HashMap; @@ -69,9 +69,9 @@ impl Expression { Ok(Value::Structure(Some(on.clone().intern()), map)) } - Expression::Reference(loc, n) => env - .get(&ArcIntern::new(n.clone())) - .ok_or_else(|| EvalError::LookupFailed(loc.clone(), n.clone())) + Expression::Reference(n) => env + .get(n.current_interned()) + .ok_or_else(|| EvalError::LookupFailed(n.location().clone(), n.current_name().to_string())) .cloned(), Expression::FieldRef(loc, expr, field) => { @@ -102,7 +102,7 @@ impl Expression { Ok(target_type.safe_cast(&value)?) } - Expression::Primitive(_, op) => Ok(Value::primitive(op.name.clone())), + Expression::Primitive(_, op) => Ok(Value::primitive(op.original_name().to_string())), Expression::Call(loc, fun, args) => { let function = fun.eval(stdout, env)?; @@ -129,8 +129,8 @@ impl Expression { } Value::Primitive(name) if name == "print" => { - if let [Expression::Reference(_, name)] = &args[..] { - let value = Expression::Reference(loc.clone(), name.clone()) + if let [Expression::Reference(name)] = &args[..] { + let value = Expression::Reference(name.clone()) .eval(stdout, env)?; let value = match value { Value::Number(x) => Value::U64(x), @@ -178,12 +178,12 @@ impl Expression { Expression::Function(_, name, arg_names, _, body) => { let result = Value::Closure( - name.clone().map(Name::intern), + name.as_ref().map(|n| n.current_interned().clone()), env.clone(), arg_names .iter() .cloned() - .map(|(x, _)| Name::intern(x)) + .map(|(x, _)| x.current_interned().clone()) .collect(), *body.clone(), ); diff --git a/src/syntax/name.rs b/src/syntax/name.rs new file mode 100644 index 0000000..5ffc5b5 --- /dev/null +++ b/src/syntax/name.rs @@ -0,0 +1,113 @@ +use crate::syntax::Location; +use internment::ArcIntern; +use std::fmt; +use std::hash::Hash; + +/// The name of a thing in the source language. +/// +/// In many ways, you can treat this like a string, but it's a very tricky +/// string in a couple of ways: +/// +/// First, it's a string associated with a particular location in the source +/// file, and you can find out what that source location is relatively easily. +/// +/// Second, it's a name that retains something of its identity across renaming, +/// so that you can keep track of what a variables original name was, as well as +/// what it's new name is if it's been renamed. +/// +/// Finally, when it comes to equality tests, comparisons, and hashing, `Name` +/// uses *only* the new name, if the variable has been renamed, or the original +/// name, if it has not been renamed. It never uses the location. This allows +/// relatively fast hashing and searching for things like binding sites, as the +/// value of the binding `Name` will be equal to the bound `Name`, even though +/// they occur at different locations. +#[derive(Clone, Debug)] +pub struct Name { + name: ArcIntern, + rename: Option>, + location: Location, +} + +impl Name { + /// Create a new name at the given location. + /// + /// This creates an "original" name, which has not been renamed, at the + /// given location. + pub fn new(n: S, location: Location) -> Name { + Name { + name: ArcIntern::new(n.to_string()), + rename: None, + location, + } + } + + /// Create a new name with no location information. + /// + /// This creates an "original" name, which has not been renamed, at the + /// given location. You should always prefer to use [`Location::new`] if + /// there is any possible way to get it, because that will be more + /// helpful to our users. + pub fn manufactured(n: S) -> Name { + Name { + name: ArcIntern::new(n.to_string()), + rename: None, + location: Location::manufactured(), + } + } + + /// Returns a reference to the original name of the variable. + /// + /// Regardless of whether or not the function has been renamed, this will + /// return whatever name this variable started with. + pub fn original_name(&self) -> &str { + self.name.as_str() + } + + /// Returns a reference to the current name of the variable. + /// + /// If the variable has been renamed, it will return that, otherwise we'll + /// return the current name. + pub fn current_name(&self) -> &str { + self.rename.as_ref().map(|x| x.as_str()).unwrap_or_else(|| self.name.as_str()) + } + + /// Returns the current name of the variable as an interned string. + pub fn current_interned(&self) -> &ArcIntern { + self.rename.as_ref().unwrap_or(&self.name) + } + + /// Return the location of this name. + pub fn location(&self) -> &Location { + &self.location + } + + /// Rename this variable to the given value + pub fn rename(&mut self, new_name: &ArcIntern) { + self.rename = Some(new_name.clone()); + } + + pub fn intern(&self) -> ArcIntern { + self.current_interned().clone() + } +} + +impl PartialEq for Name { + fn eq(&self, other: &Self) -> bool { + self.current_interned() == other.current_interned() + } +} + +impl Eq for Name {} + +impl Hash for Name { + fn hash(&self, state: &mut H) { + self.current_interned().hash(state) + } +} + +impl fmt::Display for Name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.current_name().fmt(f) + } +} + diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 4522807..6c4d53d 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -9,7 +9,8 @@ //! eventually want to leave lalrpop behind.) //! use crate::syntax::{Location, ParserError}; -use crate::syntax::ast::{Program,TopLevel,Expression,Value,Name,Type}; +use crate::syntax::ast::{Program,TopLevel,Expression,Value,Type}; +use crate::syntax::name::Name; use crate::syntax::tokens::{ConstantType, Token}; use internment::ArcIntern; @@ -241,7 +242,7 @@ FieldExpression: Expression = { // they cannot be further divided into parts AtomicExpression: Expression = { // just a variable reference - "> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()), + "> => Expression::Reference(Name::new(v.to_string(), Location::new(file_idx, l..end))), // just a number "> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)), // this expression could actually be a block! diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index a9caf3b..3ba412e 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -67,7 +67,7 @@ impl Expression { .nest(2) .braces(), ), - Expression::Reference(_, var) => allocator.text(var.to_string()), + Expression::Reference(var) => allocator.text(var.to_string()), Expression::FieldRef(_, val, field) => val .pretty(allocator) .append(allocator.text(".")) @@ -76,7 +76,7 @@ impl Expression { .text(t.clone()) .angles() .append(e.pretty(allocator)), - Expression::Primitive(_, op) => allocator.text(op.name.clone()), + Expression::Primitive(_, op) => allocator.text(op.original_name().to_string()), Expression::Call(_, fun, args) => { let args = args.iter().map(|x| x.pretty(allocator)); let comma_sepped_args = allocator.intersperse(args, allocator.text(",")); diff --git a/src/syntax/validate.rs b/src/syntax/validate.rs index afbba2c..4db79c8 100644 --- a/src/syntax/validate.rs +++ b/src/syntax/validate.rs @@ -141,9 +141,9 @@ impl Expression { (errors, warnings) } - Expression::Reference(_, var) if variable_map.contains_key(var) => (vec![], vec![]), - Expression::Reference(loc, var) => ( - vec![Error::UnboundVariable(loc.clone(), var.clone())], + Expression::Reference(var) if variable_map.contains_key(&var.original_name().to_string()) => (vec![], vec![]), + Expression::Reference(var) => ( + vec![Error::UnboundVariable(var.location().clone(), var.original_name().to_string())], vec![], ), Expression::FieldRef(_, exp, _) => exp.validate(variable_map), @@ -187,7 +187,7 @@ impl Expression { // immediately check the expression, and go from there. let (errors, mut warnings) = val.validate(variable_map); - if let Some(original_binding_site) = variable_map.get(&var.name) { + if let Some(original_binding_site) = variable_map.get(&var.original_name().to_string()) { warnings.push(Warning::ShadowedVariable( original_binding_site.clone(), loc.clone(), @@ -201,11 +201,11 @@ impl Expression { } Expression::Function(_, name, arguments, _, body) => { if let Some(name) = name { - variable_map.insert(name.name.clone(), name.location.clone()); + variable_map.insert(name.original_name().to_string(), name.location().clone()); } variable_map.new_scope(); for (arg, _) in arguments.iter() { - variable_map.insert(arg.name.clone(), arg.location.clone()); + variable_map.insert(arg.original_name().to_string(), arg.location().clone()); } let result = body.validate(variable_map); variable_map.release_scope(); diff --git a/src/type_infer/convert.rs b/src/type_infer/convert.rs index 8266eee..225f9db 100644 --- a/src/type_infer/convert.rs +++ b/src/type_infer/convert.rs @@ -191,20 +191,21 @@ impl InferenceEngine { } } - syntax::Expression::Reference(loc, name) => { - let iname = ArcIntern::new(name); - let final_name = renames.get(&iname).cloned().unwrap_or(iname); + syntax::Expression::Reference(mut name) => { + if let Some(rename) = renames.get(name.current_interned()) { + name.rename(rename); + } let result_type = self .variable_types - .get(&final_name) + .get(name.current_interned()) .cloned() .expect("variable bound before use"); let expression = ir::Expression::Atomic(ir::ValueOrRef::Ref( - loc, + name.location().clone(), result_type.clone(), - final_name.clone(), + name.current_interned().clone(), )); - let free_variables = HashSet::from([final_name]); + let free_variables = HashSet::from([name.current_interned().clone()]); ExpressionInfo { expression, @@ -260,7 +261,7 @@ impl InferenceEngine { } syntax::Expression::Primitive(loc, name) => { - let primop = ir::Primitive::from_str(&name.name).expect("valid primitive"); + let primop = ir::Primitive::from_str(&name.current_name()).expect("valid primitive"); match primop { ir::Primitive::Plus | ir::Primitive::Times | ir::Primitive::Divide => { @@ -404,12 +405,12 @@ impl InferenceEngine { expr_info } - syntax::Expression::Function(_, name, args, _, expr) => { + syntax::Expression::Function(loc, name, args, _, expr) => { // First, at some point we're going to want to know a location for this function, // which should either be the name if we have one, or the body if we don't. let function_location = match name { None => expr.location().clone(), - Some(ref name) => name.location.clone(), + Some(ref name) => loc, }; // Next, let us figure out what we're going to name this function. If the user // didn't provide one, we'll just call it "function:" for them. (We'll @@ -440,7 +441,7 @@ impl InferenceEngine { .map(|(name, mut declared_type)| { let new_type = ir::TypeOrVar::new(); self.constraints.push(Constraint::IsSomething( - name.location.clone(), + name.location().clone(), new_type.clone(), )); let new_name = self.finalize_name(renames, name.clone()); @@ -450,7 +451,7 @@ impl InferenceEngine { if let Some(declared_type) = declared_type.take() { let declared_type = self.convert_type(declared_type); self.constraints.push(Constraint::Equivalent( - name.location.clone(), + name.location().clone(), new_type.clone(), declared_type, )); @@ -495,11 +496,11 @@ impl InferenceEngine { fn convert_type(&mut self, ty: syntax::Type) -> ir::TypeOrVar { match ty { - syntax::Type::Named(x) => match PrimitiveType::from_str(x.name.as_str()) { + syntax::Type::Named(x) => match PrimitiveType::from_str(x.current_name()) { Err(_) => { - let retval = ir::TypeOrVar::new_located(x.location.clone()); + let retval = ir::TypeOrVar::new_located(x.location().clone()); self.constraints.push(Constraint::NamedTypeIs( - x.location.clone(), + x.location().clone(), x.intern(), retval.clone(), )); @@ -529,10 +530,10 @@ impl InferenceEngine { ) -> ArcIntern { if self .variable_types - .contains_key(&ArcIntern::new(name.name.clone())) + .contains_key(name.current_interned()) { - let new_name = ir::gensym(&name.name); - renames.insert(ArcIntern::new(name.name.to_string()), new_name.clone()); + let new_name = ir::gensym(&name.original_name()); + renames.insert(name.current_interned().clone(), new_name.clone()); new_name } else { ArcIntern::new(name.to_string())