From 2e82fcf343067fe10ae1cf780a347da9a8badc86 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Thu, 12 Jan 2023 18:06:06 -0800 Subject: [PATCH] Clean up location handling, which wsa kind of a pain. --- src/asts/lil.rs | 10 ++- src/bin.rs | 2 +- src/errors.rs | 129 ++++++++---------------------------- src/passes.rs | 32 ++++++++- src/passes/into_crane.rs | 8 ++- src/passes/syntax_to_hil.rs | 4 +- src/runtime.rs | 2 +- src/syntax.rs | 71 ++++++++++++++++---- src/syntax/ast.rs | 2 +- src/syntax/location.rs | 56 ++++++++++++++++ src/syntax/parser.lalrpop | 22 +++--- src/syntax/token_stream.rs | 12 ---- src/syntax/tokens.rs | 7 ++ src/variable_map.rs | 2 +- src/warnings.rs | 37 +++-------- 15 files changed, 217 insertions(+), 179 deletions(-) create mode 100644 src/syntax/location.rs diff --git a/src/asts/lil.rs b/src/asts/lil.rs index 9564f58..8aab427 100644 --- a/src/asts/lil.rs +++ b/src/asts/lil.rs @@ -68,12 +68,10 @@ impl Statement { .append(prim.pretty(variable_map, allocator)) } - Statement::Print(_, var, _val) => { - allocator - .text("print") - .append(allocator.space()) - .append(allocator.text(var.to_string())) - } + Statement::Print(_, var, _val) => allocator + .text("print") + .append(allocator.space()) + .append(allocator.text(var.to_string())), } } } diff --git a/src/bin.rs b/src/bin.rs index 6717e3a..8cc901c 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -96,7 +96,7 @@ fn main() -> Result<(), MainError> { .unwrap(); let platform = Triple::host(); - let isa_builder= isa::lookup(platform.clone())?; + let isa_builder = isa::lookup(platform.clone())?; let mut settings_builder = settings::builder(); settings_builder.set("is_pic", "true")?; let isa = isa_builder.finish(settings::Flags::new(settings_builder))?; diff --git a/src/errors.rs b/src/errors.rs index ff4cd03..6b57fc9 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -1,46 +1,18 @@ -use crate::syntax::{LexerError, Location, Token}; -use codespan_reporting::diagnostic::{Diagnostic, Label}; +use crate::syntax::{Location, ParserError}; +use codespan_reporting::diagnostic::Diagnostic; use codespan_reporting::files; -use lalrpop_util::ParseError; use std::io; -use thiserror::Error; -#[derive(Debug, Error)] +#[derive(Debug)] pub enum Error { - #[error("IO failure: {0}")] - IOError(#[from] io::Error), - - #[error("Internal file database error: {0}")] - InternalFileDBError(#[from] files::Error), - - #[error("Error in parser: {0}")] - ParserError(#[from] ParseError), - - #[error("Internal error: Couldn't deal with bound variable with no bindiing site ({0})")] + IOError(io::Error), + InternalFileDBError(files::Error), + ParserError(ParserError), BindingSiteFailure(Location, String), - - #[error("Unbound variable '{0}'")] UnboundVariable(Location, String), - - #[error("Internal error: {0}")] InternalError(Location, String), } -fn locations_to_labels(start: &Location, end: &Location) -> Vec> { - match start { - Location::Manufactured => match end { - Location::Manufactured => vec![], - Location::InFile(file_id, off) => vec![Label::primary(*file_id, *off..*off)], - }, - Location::InFile(file_id1, start) => match end { - Location::InFile(file_id2, end) if file_id1 == file_id2 => { - vec![Label::primary(*file_id1, *start..*end)] - } - _ => vec![Label::primary(*file_id1, *start..*start)], - }, - } -} - fn display_expected(expected: &[String]) -> String { match expected.len() { 0 => "".to_string(), @@ -74,37 +46,21 @@ impl From for Diagnostic { Error::ParserError(pe) => match pe { // this was just a token we didn't understand - ParseError::InvalidToken { location } => match location { - Location::Manufactured => Diagnostic::error().with_message( - "encountered extremely confusing token (in generated data?!)", - ), - Location::InFile(file_id, off) => Diagnostic::error() - .with_message("encountered extremely confusing token") - .with_labels(vec![Label::primary(*file_id, *off..*off) - .with_message("extremely odd token")]), - }, + ParserError::InvalidToken(location) => location + .labelled_error("extremely odd token") + .with_message("encountered extremely confusing token"), // unexpected EOF! - ParseError::UnrecognizedEOF { location, expected } => match location { - Location::Manufactured => Diagnostic::error().with_message(format!( - "unexpected end of file{}", - display_expected(expected) - )), - Location::InFile(file_id, off) => Diagnostic::error() - .with_message(format!( - "unexpected enf of file{}", - display_expected(expected) - )) - .with_labels(vec![Label::primary(*file_id, *off..*off)]), - }, + ParserError::UnrecognizedEOF(location, expected) => location.error().with_message( + format!("expected enf of file{}", display_expected(expected)), + ), // encountered a token where it shouldn't be - ParseError::UnrecognizedToken { token, expected } => { - let (start, token, end) = token; + ParserError::UnrecognizedToken(start, end, token, expected) => { let expected_str = format!("unexpected token {}{}", token, display_expected(expected)); let unexpected_str = format!("unexpected token {}", token); - let mut labels = locations_to_labels(start, end); + let mut labels = start.range_label(end); Diagnostic::error() .with_labels( @@ -117,12 +73,11 @@ impl From for Diagnostic { } // I think we get this when we get a token, but were expected EOF - ParseError::ExtraToken { token } => { - let (start, token, end) = token; + ParserError::ExtraToken(start, token, end) => { let expected_str = format!("unexpected token {} after the expected end of file", token); let unexpected_str = format!("unexpected token {}", token); - let mut labels = locations_to_labels(start, end); + let mut labels = start.range_label(end); Diagnostic::error() .with_labels( @@ -135,53 +90,25 @@ impl From for Diagnostic { } // simple lexer errors - ParseError::User { error } => match error { - LexerError::LexFailure(location) => match location { - Location::Manufactured => Diagnostic::error() - .with_message("unexpected character encountered in manufactured code?"), - Location::InFile(file_id, offset) => Diagnostic::error() - .with_labels(vec![Label::primary(*file_id, *offset..*offset) - .with_message("unexpected character")]), - }, - }, + ParserError::LexFailure(location) => { + location.error().with_message("unexpected character") + } }, - Error::BindingSiteFailure(location, name) => match location { - Location::Manufactured => Diagnostic::error().with_message(format!( + Error::BindingSiteFailure(location, name) => location + .labelled_error("discovered here") + .with_message(format!( "Internal Error: Lost binding site for bound variable {}", name )), - Location::InFile(file_id, offset) => Diagnostic::error() - .with_labels(vec![ - Label::primary(*file_id, *offset..*offset).with_message("discovered here") - ]) - .with_message(format!( - "Internal Error: Lost binding site for bound variable {}", - name - )), - }, - Error::UnboundVariable(location, name) => match location { - Location::Manufactured => { - Diagnostic::error().with_message(format!("Unbound variable '{}'", name)) - } - Location::InFile(file_id, offset) => Diagnostic::error() - .with_labels(vec![ - Label::primary(*file_id, *offset..*offset).with_message("unbound here") - ]) - .with_message(format!("Unbound variable '{}'", name)), - }, + Error::UnboundVariable(location, name) => location + .labelled_error("unbound here") + .with_message(format!("Unbound variable '{}'", name)), - Error::InternalError(location, string) => match location { - Location::Manufactured => { - Diagnostic::error().with_message(format!("Internal error: {}", string)) - } - Location::InFile(file_id, offset) => Diagnostic::error() - .with_labels(vec![ - Label::primary(*file_id, *offset..*offset).with_message("this is related") - ]) - .with_message(format!("Internal error: {}", string)), - }, + Error::InternalError(location, string) => location + .labelled_error("this is related") + .with_message(format!("Internal error: {}", string)), } } } diff --git a/src/passes.rs b/src/passes.rs index 011956f..00b92ef 100644 --- a/src/passes.rs +++ b/src/passes.rs @@ -1,7 +1,7 @@ use crate::asts::hil; use crate::errors::Error; -use crate::syntax; use crate::syntax::Location; +use crate::syntax::{self, ParserError}; use crate::variable_map::VariableMap; use crate::warnings::Warning; use codespan_reporting::files::SimpleFiles; @@ -19,6 +19,36 @@ pub struct PassResult { pub errors: Vec, } +impl From for PassResult> { + fn from(value: ParserError) -> Self { + PassResult { + result: None, + warnings: vec![], + errors: vec![Error::ParserError(value)], + } + } +} + +impl From for PassResult> { + fn from(value: std::io::Error) -> Self { + PassResult { + result: None, + warnings: vec![], + errors: vec![Error::IOError(value)], + } + } +} + +impl From for PassResult> { + fn from(value: codespan_reporting::files::Error) -> Self { + PassResult { + result: None, + warnings: vec![], + errors: vec![Error::InternalFileDBError(value)], + } + } +} + impl From for PassResult> where Error: From, diff --git a/src/passes/into_crane.rs b/src/passes/into_crane.rs index f9f215a..e9a5949 100644 --- a/src/passes/into_crane.rs +++ b/src/passes/into_crane.rs @@ -47,7 +47,13 @@ impl Program { )?; let mut data_context = DataContext::new(); data_context.set_align(8); - data_context.define(interned_value.as_str().to_owned().into_boxed_str().into_boxed_bytes()); + data_context.define( + interned_value + .as_str() + .to_owned() + .into_boxed_str() + .into_boxed_bytes(), + ); module.define_data(global_id, &data_context)?; let local_data = module.declare_data_in_func(global_id, &mut ctx.func); variable_name_global_values.insert(interned_value, local_data); diff --git a/src/passes/syntax_to_hil.rs b/src/passes/syntax_to_hil.rs index 6266423..45e005a 100644 --- a/src/passes/syntax_to_hil.rs +++ b/src/passes/syntax_to_hil.rs @@ -72,7 +72,7 @@ impl hil::Statement { syntax::Statement::Print(variable_loc, variable_name) => { match var_map.get_variable(&variable_name) { None => PassResult { - result: hil::Statement::Print(Location::Manufactured, 0), + result: hil::Statement::Print(Location::manufactured(), 0), warnings: vec![], errors: vec![Error::UnboundVariable(variable_loc, variable_name)], }, @@ -102,7 +102,7 @@ impl hil::Expression { syntax::Expression::Reference(location, name) => match var_map.get_variable(&name) { None => PassResult { - result: hil::Expression::Reference(Location::Manufactured, 0), + result: hil::Expression::Reference(Location::manufactured(), 0), warnings: vec![], errors: vec![Error::UnboundVariable(location, name)], }, diff --git a/src/runtime.rs b/src/runtime.rs index bcece5a..36d8ae6 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,8 +1,8 @@ use cranelift_codegen::ir::{types, AbiParam, FuncRef, Function, Signature}; use cranelift_codegen::isa::CallConv; use cranelift_module::{FuncId, Linkage, Module, ModuleResult}; -use target_lexicon::Triple; use std::collections::HashMap; +use target_lexicon::Triple; use thiserror::Error; pub struct RuntimeFunctions { diff --git a/src/syntax.rs b/src/syntax.rs index cd92691..9498118 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,6 +1,7 @@ use lalrpop_util::lalrpop_mod; +use logos::Logos; -mod token_stream; +mod location; mod tokens; lalrpop_mod!( #[allow(clippy::just_underscores_and_digits)] @@ -10,20 +11,64 @@ lalrpop_mod!( mod ast; pub use crate::syntax::ast::*; +pub use crate::syntax::location::Location; use crate::syntax::parser::ProgramParser; -use crate::syntax::token_stream::TokenStream; -pub use crate::syntax::token_stream::{LexerError, Location}; -pub use crate::syntax::tokens::Token; +pub use crate::syntax::tokens::{LexerError, Token}; use lalrpop_util::ParseError; #[cfg(test)] use std::str::FromStr; -type ParserError = ParseError; +#[derive(Debug)] +pub enum ParserError { + InvalidToken(Location), + UnrecognizedEOF(Location, Vec), + UnrecognizedToken(Location, Location, Token, Vec), + ExtraToken(Location, Token, Location), + LexFailure(Location), +} + +impl ParserError { + fn convert(file_idx: usize, err: ParseError) -> Self { + match err { + ParseError::InvalidToken { location } => { + ParserError::InvalidToken(Location::new(file_idx, location)) + } + ParseError::UnrecognizedEOF { location, expected } => { + ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected) + } + ParseError::UnrecognizedToken { + token: (start, token, end), + expected, + } => ParserError::UnrecognizedToken( + Location::new(file_idx, start), + Location::new(file_idx, end), + token, + expected, + ), + ParseError::ExtraToken { + token: (start, token, end), + } => ParserError::ExtraToken( + Location::new(file_idx, start), + token, + Location::new(file_idx, end), + ), + ParseError::User { error } => match error { + LexerError::LexFailure(offset) => { + ParserError::LexFailure(Location::new(file_idx, offset)) + } + }, + } + } +} impl Program { pub fn parse(file_idx: usize, buffer: &str) -> Result { - let lexer = TokenStream::new(file_idx, buffer); - ProgramParser::new().parse(lexer) + let lexer = Token::lexer(buffer) + .spanned() + .map(|(token, range)| (range.start, token, range.end)); + ProgramParser::new() + .parse(file_idx, lexer) + .map_err(|e| ParserError::convert(file_idx, e)) } } @@ -44,23 +89,23 @@ fn order_of_operations() { Program::from_str(muladd1).unwrap(), Program { statements: vec![Statement::Binding( - Location::InFile(testfile, 0), + Location::new(testfile, 0), "x".to_string(), Expression::Primitive( - Location::InFile(testfile, 6), + Location::new(testfile, 6), "+".to_string(), vec![ - Expression::Value(Location::InFile(testfile, 4), Value::Number(None, 1)), + Expression::Value(Location::new(testfile, 4), Value::Number(None, 1)), Expression::Primitive( - Location::InFile(testfile, 10), + Location::new(testfile, 10), "*".to_string(), vec![ Expression::Value( - Location::InFile(testfile, 8), + Location::new(testfile, 8), Value::Number(None, 2), ), Expression::Value( - Location::InFile(testfile, 12), + Location::new(testfile, 12), Value::Number(None, 3), ), ] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index aa79518..76bb011 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,4 +1,4 @@ -use crate::syntax::token_stream::Location; +use crate::syntax::Location; use pretty::{DocAllocator, DocBuilder, Pretty}; static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"]; diff --git a/src/syntax/location.rs b/src/syntax/location.rs new file mode 100644 index 0000000..65e1402 --- /dev/null +++ b/src/syntax/location.rs @@ -0,0 +1,56 @@ +use codespan_reporting::diagnostic::{Diagnostic, Label}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Location { + file_idx: usize, + offset: usize, +} + +impl Location { + pub fn new(file_idx: usize, offset: usize) -> Self { + Location { file_idx, offset } + } + + pub fn manufactured() -> Self { + Location { + file_idx: 0, + offset: 0, + } + } + + pub fn primary_label(&self) -> Label { + Label::primary(self.file_idx, self.offset..self.offset) + } + + pub fn secondary_label(&self) -> Label { + Label::secondary(self.file_idx, self.offset..self.offset) + } + + pub fn range_label(&self, end: &Location) -> Vec> { + if self.file_idx == end.file_idx { + vec![Label::primary(self.file_idx, self.offset..end.offset)] + } else if self.file_idx == 0 { + // if this is a manufactured item, then ... just try the other one + vec![Label::primary(end.file_idx, end.offset..end.offset)] + } else { + // we'll just pick the first location if this is in two different + // files + vec![Label::primary(self.file_idx, self.offset..self.offset)] + } + } + + pub fn error(&self) -> Diagnostic { + Diagnostic::error().with_labels(vec![Label::primary( + self.file_idx, + self.offset..self.offset, + )]) + } + + pub fn labelled_error(&self, msg: &str) -> Diagnostic { + Diagnostic::error().with_labels(vec![Label::primary( + self.file_idx, + self.offset..self.offset, + ) + .with_message(msg)]) + } +} diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 4257d57..feefaad 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -1,12 +1,12 @@ +use crate::syntax::{LexerError, Location}; use crate::syntax::ast::{Program,Statement,Expression,Value}; use crate::syntax::tokens::Token; -use crate::syntax::token_stream::{LexerError, Location}; use internment::ArcIntern; -grammar; +grammar(file_idx: usize); extern { - type Location = Location; + type Location = usize; type Error = LexerError; enum Token { @@ -42,8 +42,8 @@ Statements: Vec = { } Statement: Statement = { - "> "=" ";" => Statement::Binding(l, v.to_string(), e), - "print" "> ";" => Statement::Print(l, v.to_string()), + "> "=" ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e), + "print" "> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()), } Expression: Expression = { @@ -51,21 +51,21 @@ Expression: Expression = { } AdditiveExpression: Expression = { - "+" => Expression::Primitive(l, "+".to_string(), vec![e1, e2]), - "-" => Expression::Primitive(l, "-".to_string(), vec![e1, e2]), + "+" => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]), + "-" => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]), MultiplicativeExpression, } MultiplicativeExpression: Expression = { - "*" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), - "/" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), + "*" => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]), + "/" => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]), AtomicExpression, } AtomicExpression: Expression = { - "> => Expression::Reference(l, v.to_string()), + "> => Expression::Reference(Location::new(file_idx, l), v.to_string()), "> => { let val = Value::Number(n.0, n.1); - Expression::Value(l, val) + Expression::Value(Location::new(file_idx, l), val) } } \ No newline at end of file diff --git a/src/syntax/token_stream.rs b/src/syntax/token_stream.rs index 42f3dc3..8743bb3 100644 --- a/src/syntax/token_stream.rs +++ b/src/syntax/token_stream.rs @@ -44,18 +44,6 @@ impl Default for Location { } } -#[derive(Debug, Error, PartialEq, Eq)] -pub enum LexerError { - #[error("Failed lexing at {0}")] - LexFailure(Location), -} - -impl LexerError { - fn new(file_idx: usize, offset: usize) -> LexerError { - LexerError::LexFailure(Location::new(file_idx, offset)) - } -} - type LocatedToken = Result<(Location, Token, Location), LexerError>; impl<'s> Iterator for TokenStream<'s> { diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 3120c63..5332ce9 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -2,6 +2,7 @@ use internment::ArcIntern; use logos::{Lexer, Logos}; use std::fmt; use std::num::ParseIntError; +use thiserror::Error; #[derive(Logos, Clone, Debug, PartialEq, Eq)] pub enum Token { @@ -54,6 +55,12 @@ impl fmt::Display for Token { } } +#[derive(Debug, Error, PartialEq, Eq)] +pub enum LexerError { + #[error("Failed lexing at {0}")] + LexFailure(usize), +} + #[cfg(test)] impl Token { pub(crate) fn var(s: &str) -> Token { diff --git a/src/variable_map.rs b/src/variable_map.rs index 5c7f33c..5331c73 100644 --- a/src/variable_map.rs +++ b/src/variable_map.rs @@ -64,7 +64,7 @@ impl VariableMap { self.next_index += 1; self.map.insert( result, - VariableInfo::new(format!("", result), Location::Manufactured), + VariableInfo::new(format!("", result), Location::manufactured()), ); result diff --git a/src/warnings.rs b/src/warnings.rs index ea63d08..bb14086 100644 --- a/src/warnings.rs +++ b/src/warnings.rs @@ -1,5 +1,5 @@ use crate::syntax::Location; -use codespan_reporting::diagnostic::{Diagnostic, Label}; +use codespan_reporting::diagnostic::Diagnostic; #[derive(Debug, PartialEq, Eq)] pub enum Warning { @@ -9,33 +9,14 @@ pub enum Warning { impl From for Diagnostic { fn from(x: Warning) -> Self { match &x { - Warning::ShadowedVariable(original, new, name) => match original { - Location::Manufactured => match new { - Location::Manufactured => Diagnostic::warning() - .with_message(format!("Variable '{}' is rebound", name)), - Location::InFile(file_id, offset) => Diagnostic::warning() - .with_labels(vec![Label::primary(*file_id, *offset..*offset) - .with_message("variable rebound here")]) - .with_message(format!("Variable '{}' is rebound", name)), - }, - Location::InFile(orig_file_id, orig_offset) => match new { - Location::Manufactured => Diagnostic::warning() - .with_labels(vec![Label::primary( - *orig_file_id, - *orig_offset..*orig_offset, - ) - .with_message("original binding site")]) - .with_message(format!("Variable '{}' is rebound", name)), - Location::InFile(new_file_id, new_offset) => Diagnostic::warning() - .with_labels(vec![ - Label::primary(*new_file_id, *new_offset..*new_offset) - .with_message("variable rebound here"), - Label::secondary(*orig_file_id, *orig_offset..*orig_offset) - .with_message("original binding site"), - ]) - .with_message(format!("Variable '{}' is rebound", name)), - }, - }, + Warning::ShadowedVariable(original, new, name) => Diagnostic::warning() + .with_labels(vec![ + new.primary_label().with_message("variable rebound here"), + original + .secondary_label() + .with_message("original binding site"), + ]) + .with_message(format!("Variable '{}' is rebound", name)), } } }