diff --git a/src/repl.rs b/src/repl.rs index 5aa0c9b..197e246 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -134,10 +134,10 @@ impl REPL { // if this is a variable binding, and we've never defined this variable before, // we should tell cranelift about it. this is optimistic; if we fail to compile, // then we won't use this definition until someone tries again. - if !self.variable_binding_sites.contains_key(&name) { - self.jitter.define_string(&name)?; + if !self.variable_binding_sites.contains_key(&name.name) { + self.jitter.define_string(&name.name)?; self.jitter - .define_variable(name.clone(), ConstantType::U64)?; + .define_variable(name.to_string(), ConstantType::U64)?; } crate::syntax::Program { diff --git a/src/syntax.rs b/src/syntax.rs index 689c407..2fe4d13 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -73,12 +73,12 @@ pub enum ParserError { /// Raised when we're parsing the file, and run into a token in a /// place we weren't expecting it. #[error("Unrecognized token")] - UnrecognizedToken(Location, Location, Token, Vec), + UnrecognizedToken(Location, Token, Vec), /// Raised when we were expecting the end of the file, but instead /// got another token. #[error("Extra token")] - ExtraToken(Location, Token, Location), + ExtraToken(Location, Token), /// Raised when the lexer just had some sort of internal problem /// and just gave up. @@ -106,30 +106,28 @@ impl ParserError { fn convert(file_idx: usize, err: ParseError) -> Self { match err { ParseError::InvalidToken { location } => { - ParserError::InvalidToken(Location::new(file_idx, location)) + ParserError::InvalidToken(Location::new(file_idx, location..location+1)) } ParseError::UnrecognizedEof { location, expected } => { - ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected) + ParserError::UnrecognizedEOF(Location::new(file_idx, location..location+1), expected) } ParseError::UnrecognizedToken { token: (start, token, end), expected, } => ParserError::UnrecognizedToken( - Location::new(file_idx, start), - Location::new(file_idx, end), + Location::new(file_idx, start..end), token, expected, ), ParseError::ExtraToken { token: (start, token, end), } => ParserError::ExtraToken( - Location::new(file_idx, start), + Location::new(file_idx, start..end), token, - Location::new(file_idx, end), ), ParseError::User { error } => match error { LexerError::LexFailure(offset) => { - ParserError::LexFailure(Location::new(file_idx, offset)) + ParserError::LexFailure(Location::new(file_idx, offset..offset+1)) } }, } @@ -180,37 +178,29 @@ impl<'a> From<&'a ParserError> for Diagnostic { ), // encountered a token where it shouldn't be - ParserError::UnrecognizedToken(start, end, token, expected) => { + ParserError::UnrecognizedToken(loc, token, expected) => { let expected_str = format!("unexpected token {}{}", token, display_expected(expected)); let unexpected_str = format!("unexpected token {}", token); - let labels = start.range_label(end); Diagnostic::error() - .with_labels( - labels - .into_iter() - .map(|l| l.with_message(unexpected_str.clone())) - .collect(), - ) .with_message(expected_str) + .with_labels(vec![ + loc.primary_label().with_message(unexpected_str) + ]) } // I think we get this when we get a token, but were expected EOF - ParserError::ExtraToken(start, token, end) => { + ParserError::ExtraToken(loc, token) => { let expected_str = format!("unexpected token {} after the expected end of file", token); let unexpected_str = format!("unexpected token {}", token); - let labels = start.range_label(end); Diagnostic::error() - .with_labels( - labels - .into_iter() - .map(|l| l.with_message(unexpected_str.clone())) - .collect(), - ) .with_message(expected_str) + .with_labels(vec![ + loc.primary_label().with_message(unexpected_str) + ]) } // simple lexer errors @@ -293,23 +283,23 @@ fn order_of_operations() { Program::from_str(muladd1).unwrap(), Program { statements: vec![Statement::Binding( - Location::new(testfile, 0), - "x".to_string(), + Location::new(testfile, 0..1), + Name::manufactured("x"), Expression::Primitive( - Location::new(testfile, 6), + Location::new(testfile, 6..7), "+".to_string(), vec![ - Expression::Value(Location::new(testfile, 4), Value::Number(None, None, 1),), + Expression::Value(Location::new(testfile, 4..5), Value::Number(None, None, 1),), Expression::Primitive( - Location::new(testfile, 10), + Location::new(testfile, 10..11), "*".to_string(), vec![ Expression::Value( - Location::new(testfile, 8), + Location::new(testfile, 8..9), Value::Number(None, None, 2), ), Expression::Value( - Location::new(testfile, 12), + Location::new(testfile, 12..13), Value::Number(None, None, 3), ), ] diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 1fb2397..b78ca54 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,4 +1,4 @@ -use crate::syntax::ast::{ConstantType, Expression, Program, Statement, Value}; +use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, Value}; use crate::syntax::location::Location; use proptest::sample::select; use proptest::{ @@ -10,15 +10,12 @@ use std::collections::HashMap; const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*"; const OPERATORS: &[(&str, usize)] = &[("+", 2), ("-", 1), ("-", 2), ("*", 2), ("/", 2)]; -#[derive(Clone, Debug)] -struct Name(String); - impl Arbitrary for Name { type Parameters = (); type Strategy = BoxedStrategy; fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { - VALID_VARIABLE_NAMES.prop_map(Name).boxed() + VALID_VARIABLE_NAMES.prop_map(Name::manufactured).boxed() } } @@ -67,12 +64,12 @@ impl Arbitrary for Program { output_type: Some(psi.binding_type), }); - defined_variables.insert(psi.name.0.clone(), psi.binding_type); + defined_variables.insert(psi.name.name.clone(), psi.binding_type); statements.push( expr.prop_map(move |expr| { Statement::Binding( Location::manufactured(), - psi.name.0.clone(), + psi.name.clone(), expr, ) }) @@ -81,7 +78,7 @@ impl Arbitrary for Program { } else { let printers = defined_variables .keys() - .map(|n| Just(Statement::Print(Location::manufactured(), n.clone()))); + .map(|n| Just(Statement::Print(Location::manufactured(), Name::manufactured(n)))); statements.push(Union::new(printers).boxed()); } } diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 5e356e5..64e8b22 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,3 +1,8 @@ +use std::fmt; +use std::hash::Hash; + +use internment::ArcIntern; + pub use crate::syntax::tokens::ConstantType; use crate::syntax::Location; @@ -14,6 +19,50 @@ pub struct Program { pub statements: Vec, } +/// A Name. +/// +/// This is basically a string, but annotated with the place the string +/// is in the source file. +#[derive(Clone, Debug)] +pub struct Name { + pub name: String, + pub location: Location, +} + +impl Name { + pub fn new(n: S, location: Location) -> Name { + Name{ name: n.to_string(), location } + } + + pub fn manufactured(n: S) -> Name { + Name{ name: n.to_string(), location: Location::manufactured() } + } + + pub fn intern(self) -> ArcIntern { + ArcIntern::new(self.name) + } +} + +impl PartialEq for Name { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl Eq for Name {} + +impl Hash for Name { + fn hash(&self, state: &mut H) { + self.name.hash(state) + } +} + +impl fmt::Display for Name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.name.fmt(f) + } +} + /// A parsed statement. /// /// Statements are guaranteed to be syntactically valid, but may be @@ -27,8 +76,8 @@ pub struct Program { /// thing, not if they are the exact same statement. #[derive(Clone, Debug)] pub enum Statement { - Binding(Location, String, Expression), - Print(Location, String), + Binding(Location, Name, Expression), + Print(Location, Name), } impl PartialEq for Statement { diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index f952e45..d6fda74 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -25,11 +25,11 @@ impl Program { match stmt { Statement::Binding(_, name, value) => { let actual_value = value.eval(&env)?; - env = env.extend(ArcIntern::new(name.clone()), actual_value); + env = env.extend(name.clone().intern(), actual_value); } Statement::Print(_, name) => { - let value = env.lookup(ArcIntern::new(name.clone()))?; + let value = env.lookup(name.clone().intern())?; let line = format!("{} = {}\n", name, value); stdout.push_str(&line); } diff --git a/src/syntax/location.rs b/src/syntax/location.rs index d193d26..a4c100c 100644 --- a/src/syntax/location.rs +++ b/src/syntax/location.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use codespan_reporting::diagnostic::{Diagnostic, Label}; /// A source location, for use in pointing users towards warnings and errors. @@ -7,7 +9,7 @@ use codespan_reporting::diagnostic::{Diagnostic, Label}; #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct Location { file_idx: usize, - offset: usize, + location: Range, } impl Location { @@ -17,8 +19,8 @@ impl Location { /// The file index is based on the file database being used. See the /// `codespan_reporting::files::SimpleFiles::add` function, which is /// normally where we get this index. - pub fn new(file_idx: usize, offset: usize) -> Self { - Location { file_idx, offset } + pub fn new(file_idx: usize, location: Range) -> Self { + Location { file_idx, location } } /// Generate a `Location` for a completely manufactured bit of code. @@ -30,7 +32,7 @@ impl Location { pub fn manufactured() -> Self { Location { file_idx: 0, - offset: 0, + location: 0..0, } } @@ -47,7 +49,7 @@ impl Location { /// actually happened), but you'd probably want to make the first location /// the secondary label to help users find it. pub fn primary_label(&self) -> Label { - Label::primary(self.file_idx, self.offset..self.offset) + Label::primary(self.file_idx, self.location.clone()) } /// Generate a secondary label for a [`Diagnostic`], based on this source @@ -64,35 +66,7 @@ impl Location { /// probably want to make the first location the secondary label to help /// users find it. pub fn secondary_label(&self) -> Label { - Label::secondary(self.file_idx, self.offset..self.offset) - } - - /// Given this location and another, generate a primary label that - /// specifies the area between those two locations. - /// - /// See [`Self::primary_label`] for some discussion of primary versus - /// secondary labels. If the two locations are the same, this method does - /// the exact same thing as [`Self::primary_label`]. If this item was - /// generated by [`Self::manufactured`], it will act as if you'd called - /// `primary_label` on the argument. Otherwise, it will generate the obvious - /// span. - /// - /// This function will return `None` only in the case that you provide - /// labels from two different files, which it cannot sensibly handle. - pub fn range_label(&self, end: &Location) -> Option> { - if self.file_idx == 0 { - return Some(end.primary_label()); - } - - if self.file_idx != end.file_idx { - return None; - } - - if self.offset > end.offset { - Some(Label::primary(self.file_idx, end.offset..self.offset)) - } else { - Some(Label::primary(self.file_idx, self.offset..end.offset)) - } + Label::secondary(self.file_idx, self.location.clone()) } /// Return an error diagnostic centered at this location. @@ -104,7 +78,7 @@ impl Location { pub fn error(&self) -> Diagnostic { Diagnostic::error().with_labels(vec![Label::primary( self.file_idx, - self.offset..self.offset, + self.location.clone(), )]) } @@ -117,8 +91,23 @@ impl Location { pub fn labelled_error(&self, msg: &str) -> Diagnostic { Diagnostic::error().with_labels(vec![Label::primary( self.file_idx, - self.offset..self.offset, + self.location.clone(), ) .with_message(msg)]) } + + /// Merge two locations into a single location spanning the whole range between + /// them. + /// + /// This function returns None if the locations are from different files; this + /// can happen if one of the locations is manufactured, for example. + pub fn merge(&self, other: &Self) -> Option { + if self.file_idx != other.file_idx { + None + } else { + let start = if self.location.start <= other.location.start { self.location.start } else { other.location.start }; + let end = if self.location.end >= other.location.end { self.location.end } else { other.location.end }; + Some(Location { file_idx: self.file_idx, location: start..end }) + } + } } diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 3cedfc2..fc6a0c6 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -9,7 +9,7 @@ //! eventually want to leave lalrpop behind.) //! use crate::syntax::{LexerError, Location}; -use crate::syntax::ast::{Program,Statement,Expression,Value}; +use crate::syntax::ast::{Program,Statement,Expression,Value,Name}; use crate::syntax::tokens::{ConstantType, Token}; use internment::ArcIntern; @@ -91,10 +91,19 @@ pub Statement: Statement = { // A statement can be a variable binding. Note, here, that we use this // funny @L thing to get the source location before the variable, so that // we can say that this statement spans across everything. - "> "=" ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e), + "> "=" ";" => + Statement::Binding( + Location::new(file_idx, ls..le), + Name::new(v, Location::new(file_idx, ls..var_end)), + e, + ), // Alternatively, a statement can just be a print statement. - "print" "> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()), + "print" "> ";" => + Statement::Print( + Location::new(file_idx, ls..le), + Name::new(v, Location::new(file_idx, name_start..name_end)), + ), } // Expressions! Expressions are a little fiddly, because we're going to @@ -126,21 +135,27 @@ Expression: Expression = { // we group addition and subtraction under the heading "additive" AdditiveExpression: Expression = { - "+" => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]), - "-" => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]), + "+" => + Expression::Primitive(Location::new(file_idx, ls..le), "+".to_string(), vec![e1, e2]), + "-" => + Expression::Primitive(Location::new(file_idx, ls..le), "-".to_string(), vec![e1, e2]), MultiplicativeExpression, } // similarly, we group multiplication and division under "multiplicative" MultiplicativeExpression: Expression = { - "*" => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]), - "/" => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]), + "*" => + Expression::Primitive(Location::new(file_idx, ls..le), "*".to_string(), vec![e1, e2]), + "/" => + Expression::Primitive(Location::new(file_idx, ls..le), "/".to_string(), vec![e1, e2]), UnaryExpression, } UnaryExpression: Expression = { - "-" => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e]), - "<" "> ">" => Expression::Cast(Location::new(file_idx, l), v.to_string(), Box::new(e)), + "-" => + Expression::Primitive(Location::new(file_idx, l..le), "-".to_string(), vec![e]), + "<" "> ">" => + Expression::Cast(Location::new(file_idx, l..le), v.to_string(), Box::new(e)), AtomicExpression, } @@ -148,9 +163,9 @@ UnaryExpression: Expression = { // they cannot be further divided into parts AtomicExpression: Expression = { // just a variable reference - "> => Expression::Reference(Location::new(file_idx, l), v.to_string()), + "> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()), // just a number - "> => Expression::Value(Location::new(file_idx, l), Value::Number(n.0, n.1, n.2)), + "> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)), // finally, let people parenthesize expressions and get back to a // lower precedence "(" ")" => e, diff --git a/src/syntax/validate.rs b/src/syntax/validate.rs index a3589f6..30afe5c 100644 --- a/src/syntax/validate.rs +++ b/src/syntax/validate.rs @@ -117,20 +117,20 @@ impl Statement { errors.append(&mut exp_errors); warnings.append(&mut exp_warnings); - if let Some(original_binding_site) = bound_variables.get(var) { + if let Some(original_binding_site) = bound_variables.get(&var.name) { warnings.push(Warning::ShadowedVariable( original_binding_site.clone(), loc.clone(), - var.clone(), + var.to_string(), )); } else { - bound_variables.insert(var.clone(), loc.clone()); + bound_variables.insert(var.to_string(), loc.clone()); } } - Statement::Print(_, var) if bound_variables.contains_key(var) => {} + Statement::Print(_, var) if bound_variables.contains_key(&var.name) => {} Statement::Print(loc, var) => { - errors.push(Error::UnboundVariable(loc.clone(), var.clone())) + errors.push(Error::UnboundVariable(loc.clone(), var.to_string())) } } diff --git a/src/type_infer/convert.rs b/src/type_infer/convert.rs index 5806153..3db2c59 100644 --- a/src/type_infer/convert.rs +++ b/src/type_infer/convert.rs @@ -53,7 +53,7 @@ fn convert_statement( ) -> Vec { match statement { syntax::Statement::Print(loc, name) => { - let iname = ArcIntern::new(name); + let iname = ArcIntern::new(name.to_string()); let final_name = renames .get(&iname) .map(Clone::clone) @@ -71,7 +71,7 @@ fn convert_statement( syntax::Statement::Binding(loc, name, expr) => { let (mut prereqs, expr, ty) = convert_expression(expr, constraint_db, renames, bindings); - let iname = ArcIntern::new(name); + let iname = ArcIntern::new(name.to_string()); let final_name = if bindings.contains_key(&iname) { let new_name = ir::gensym(iname.as_str()); renames.insert(iname, new_name.clone());