diff --git a/Cargo.toml b/Cargo.toml index 6cb39f0..40eea36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "ngr" version = "0.1.0" authors = ["awick"] -edition = "2018" +edition = "2021" [lib] name = "ngr" @@ -14,6 +14,8 @@ path = "src/bin.rs" [dependencies] clap = { version = "^3.0.14", features = ["derive"] } +codespan = "0.11.1" +codespan-reporting = "0.11.1" lalrpop-util = "^0.19.7" lazy_static = "^1.4.0" logos = "^0.12.0" diff --git a/src/bin.rs b/src/bin.rs index d475914..36cc1e5 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,5 +1,11 @@ use clap::Parser; -use ngr::syntax::{ParserError, Program}; +use codespan_reporting::diagnostic::Diagnostic; +use codespan_reporting::files::SimpleFiles; +use codespan_reporting::term; +use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; +use ngr::error::Error; +use ngr::syntax::Program; +use std::fs; #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] @@ -9,21 +15,30 @@ struct CommandLineArguments { output: Option, /// The file to parse - file: String + file: String, } -fn real_main() -> Result<(), ParserError> { - let args = CommandLineArguments::parse(); - - let program = Program::from_file(&args.file)?; - println!("args: {:?}", args); - println!("program: {:?}", program); - - Ok(()) +fn compile_file( + file_database: &mut SimpleFiles, + initial_file_name: &str, +) -> Result { + let initial_file_contents = fs::read_to_string(initial_file_name)?; + let initial_file = file_database.add(initial_file_name.to_string(), initial_file_contents); + let db_version = file_database.get(initial_file)?; + let db_version_source = db_version.source(); + Ok(Program::parse(initial_file, db_version_source)?) } fn main() { - if let Err(e) = real_main() { - println!("{}", e); + let args = CommandLineArguments::parse(); + let mut file_database = SimpleFiles::new(); + let initial_file_name = &args.file; + + if let Err(e) = compile_file(&mut file_database, initial_file_name) { + let diagnostic = Diagnostic::from(e); + let writer = StandardStream::stderr(ColorChoice::Auto); + let config = codespan_reporting::term::Config::default(); + + term::emit(&mut writer.lock(), &config, &file_database, &diagnostic).unwrap(); } } diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..eee0d05 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,141 @@ +use crate::syntax::{LexerError, Location, Token}; +use codespan_reporting::diagnostic::{Diagnostic, Label}; +use codespan_reporting::files; +use lalrpop_util::ParseError; +use std::io; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("IO failure: {0}")] + IOError(#[from] io::Error), + + #[error("Internal file database error: {0}")] + InternalFileDBError(#[from] files::Error), + + #[error("Error in parser: {0}")] + ParserError(#[from] ParseError), +} + +fn locations_to_labels(start: &Location, end: &Location) -> Vec> { + match start { + Location::Manufactured => match end { + Location::Manufactured => vec![], + Location::InFile(file_id, off) => vec![Label::primary(*file_id, *off..*off)], + }, + Location::InFile(file_id1, start) => match end { + Location::InFile(file_id2, end) if file_id1 == file_id2 => { + vec![Label::primary(*file_id1, *start..*end)] + } + _ => vec![Label::primary(*file_id1, *start..*start)], + }, + } +} + +fn display_expected(expected: &[String]) -> String { + match expected.len() { + 0 => "".to_string(), + 1 => format!("; expected {}", expected[0]), + 2 => format!("; expected {} or {}", expected[0], expected[1]), + n => format!( + "; expected {}or {}", + comma_separate(&expected[0..n - 1]), + expected[n - 1] + ), + } +} + +fn comma_separate(strings: &[String]) -> String { + let mut result = String::new(); + + for s in strings.iter() { + result.push_str(s); + result.push_str(", "); + } + + result +} + +impl From for Diagnostic { + fn from(x: Error) -> Self { + match &x { + Error::IOError(e) => Diagnostic::error().with_message(format!("{}", e)), + + Error::InternalFileDBError(e) => Diagnostic::error().with_message(format!("{}", e)), + + Error::ParserError(pe) => match pe { + // this was just a token we didn't understand + ParseError::InvalidToken { location } => match location { + Location::Manufactured => Diagnostic::error().with_message( + "encountered extremely confusing token (in generated data?!)", + ), + Location::InFile(file_id, off) => Diagnostic::error() + .with_message("encountered extremely confusing token") + .with_labels(vec![Label::primary(*file_id, *off..*off) + .with_message("extremely odd token")]), + }, + + // unexpected EOF! + ParseError::UnrecognizedEOF { location, expected } => match location { + Location::Manufactured => Diagnostic::error().with_message(format!( + "unexpected end of file{}", + display_expected(expected) + )), + Location::InFile(file_id, off) => Diagnostic::error() + .with_message(format!( + "unexpected enf of file{}", + display_expected(expected) + )) + .with_labels(vec![Label::primary(*file_id, *off..*off)]), + }, + + // encountered a token where it shouldn't be + ParseError::UnrecognizedToken { token, expected } => { + let (start, token, end) = token; + let expected_str = + format!("unexpected token {}{}", token, display_expected(expected)); + let unexpected_str = format!("unexpected token {}", token); + let mut labels = locations_to_labels(start, end); + + Diagnostic::error() + .with_labels( + labels + .drain(..) + .map(|l| l.with_message(unexpected_str.clone())) + .collect(), + ) + .with_message(expected_str) + } + + // I think we get this when we get a token, but were expected EOF + ParseError::ExtraToken { token } => { + let (start, token, end) = token; + let expected_str = + format!("unexpected token {} after the expected end of file", token); + let unexpected_str = format!("unexpected token {}", token); + let mut labels = locations_to_labels(start, end); + + Diagnostic::error() + .with_labels( + labels + .drain(..) + .map(|l| l.with_message(unexpected_str.clone())) + .collect(), + ) + .with_message(expected_str) + } + + // simple lexer errors + ParseError::User { error } => match error { + LexerError::LexFailure(location) => match location { + Location::Manufactured => Diagnostic::error() + .with_message("unexpected character encountered in manufactured code?"), + Location::InFile(file_id, offset) => Diagnostic::error() + .with_labels(vec![Label::primary(*file_id, *offset..*offset) + .with_message("unexpected character")]), + }, + }, + }, + } + } +} diff --git a/src/lib.rs b/src/lib.rs index ff3ff0a..4c51213 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,3 @@ +pub mod error; pub mod syntax; pub mod util; diff --git a/src/syntax.rs b/src/syntax.rs index 0fe4cd0..74c25da 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -2,111 +2,44 @@ use lalrpop_util::lalrpop_mod; mod token_stream; mod tokens; -lalrpop_mod!(parser, "/syntax/parser.rs"); +lalrpop_mod!( + #[allow(clippy::just_underscores_and_digits)] + parser, + "/syntax/parser.rs" +); mod ast; pub use crate::syntax::ast::*; use crate::syntax::parser::ProgramParser; -use crate::syntax::token_stream::{LexerError, Location, TokenStream}; -use crate::syntax::tokens::Token; -#[cfg(test)] -use crate::util::istring::InternedString; +use crate::syntax::token_stream::TokenStream; +pub use crate::syntax::token_stream::{LexerError, Location}; +pub use crate::syntax::tokens::Token; use lalrpop_util::ParseError; -use std::fmt; -use std::fs; -use std::io; +#[cfg(test)] use std::str::FromStr; -use thiserror::Error; -#[derive(Debug, Error)] -pub enum ParserError { - IOError(io::Error), - ParseError(ParseError), -} - -impl fmt::Display for ParserError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - ParserError::IOError(e) => write!(f, "IO error: {}", e), - - ParserError::ParseError(ParseError::ExtraToken{ token: (loc, tok, _)}) => { - write!(f, "{}: Unexpected additional token ({}) found at end of file", loc, tok) - } - ParserError::ParseError(ParseError::InvalidToken { location }) => { - write!(f, "{}: Unexpected token encountered", location) - } - ParserError::ParseError(ParseError::UnrecognizedEOF { location, expected }) => { - write!(f, "{}: Unexpected EOF{}", location, display_expected(expected)) - } - ParserError::ParseError(ParseError::UnrecognizedToken { token: (location, tok, _), expected }) => { - write!(f, "{}: Unexpected token {}{}", location, tok, display_expected(expected)) - } - ParserError::ParseError(ParseError::User{ error }) => { - write!(f, "{}: Couldn't process input (lexer error)", error.location) - } - } - } -} - -fn display_expected(expected: &Vec) -> String { - match expected.len() { - 0 => "".to_string(), - 1 => format!("; expected {}", expected[0]), - 2 => format!("; expected {} or {}", expected[0], expected[1]), - n => format!("; expected {}or {}", comma_separate(&expected[0..n-1]), expected[n-1]) - } -} - -fn comma_separate(strings: &[String]) -> String { - let mut result = String::new(); - - for s in strings.iter() { - result.push_str(&s); - result.push_str(", "); - } - - result -} - -impl From for ParserError { - fn from(x: io::Error) -> Self { - ParserError::IOError(x) - } -} - -impl From> for ParserError { - fn from(x: ParseError) -> Self { - ParserError::ParseError(x) - } -} +type ParserError = ParseError; impl Program { - pub fn from_file(filename: &str) -> Result { - let metadata = fs::metadata(filename)?; - let mut buffer = String::with_capacity(metadata.len() as usize); - let lexer = TokenStream::from_file(filename, &mut buffer)?; - Ok(ProgramParser::new().parse(lexer)?) - } - - fn parse(filename: &str, buffer: &mut String) -> Result { - let lexer = TokenStream::new(filename, buffer); - Ok(ProgramParser::new().parse(lexer)?) + pub fn parse(file_idx: usize, buffer: &str) -> Result { + let lexer = TokenStream::new(file_idx, buffer); + ProgramParser::new().parse(lexer) } } +#[cfg(test)] impl FromStr for Program { type Err = ParserError; fn from_str(s: &str) -> Result { - let mut s2 = s.to_string(); - Program::parse("", &mut s2) + Program::parse(0, s) } } #[test] fn order_of_operations() { let muladd1 = "1 + 2 * 3"; - let testfile = InternedString::new(""); + let testfile = 0; assert_eq!( Program::from_str(muladd1).unwrap(), Program { diff --git a/src/syntax/token_stream.rs b/src/syntax/token_stream.rs index b394477..eb393ee 100644 --- a/src/syntax/token_stream.rs +++ b/src/syntax/token_stream.rs @@ -1,34 +1,25 @@ use crate::syntax::tokens::Token; -use crate::util::istring::InternedString; use logos::{Logos, SpannedIter}; use std::fmt; -use std::fs::File; -use std::io; -use std::io::Read; +use thiserror::Error; pub struct TokenStream<'s> { - filename: InternedString, + file_idx: usize, lexer: SpannedIter<'s, Token>, } impl<'s> TokenStream<'s> { - pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { + pub fn new(file_idx: usize, s: &'s str) -> TokenStream<'s> { TokenStream { - filename: InternedString::new(filename), + file_idx, lexer: Token::lexer(s).spanned(), } } - - pub fn from_file(filename: &str, buffer: &'s mut String) -> io::Result> { - let mut file = File::open(filename)?; - file.read_to_string(buffer)?; - Ok(TokenStream::new(filename, buffer)) - } } #[derive(Clone, Debug, PartialEq)] pub enum Location { - InFile(InternedString, usize), + InFile(usize, usize), Manufactured, } @@ -42,8 +33,8 @@ impl fmt::Display for Location { } impl Location { - fn new(filename: InternedString, offset: usize) -> Location { - Location::InFile(filename, offset) + fn new(file_idx: usize, offset: usize) -> Location { + Location::InFile(file_idx, offset) } } @@ -53,14 +44,15 @@ impl Default for Location { } } -#[derive(Debug, PartialEq)] -pub struct LexerError { - pub location: Location, +#[derive(Debug, Error, PartialEq)] +pub enum LexerError { + #[error("Failed lexing at {0}")] + LexFailure(Location), } impl LexerError { - fn new(filename: InternedString, offset: usize) -> LexerError { - LexerError { location: Location::new(filename, offset) } + fn new(file_idx: usize, offset: usize) -> LexerError { + LexerError::LexFailure(Location::new(file_idx, offset)) } } @@ -72,10 +64,10 @@ impl<'s> Iterator for TokenStream<'s> { fn next(&mut self) -> Option { match self.lexer.next() { None => None, - Some((Token::Error, span)) => Some(Err(LexerError::new(self.filename, span.start))), + Some((Token::Error, span)) => Some(Err(LexerError::new(self.file_idx, span.start))), Some((token, span)) => { - let start = Location::new(self.filename, span.start); - let end = Location::new(self.filename, span.end); + let start = Location::new(self.file_idx, span.start); + let end = Location::new(self.file_idx, span.end); Some(Ok((start, token, end))) } } @@ -84,46 +76,46 @@ impl<'s> Iterator for TokenStream<'s> { #[test] fn stream_works() { - let fname = InternedString::new(""); - let mut lex0 = TokenStream::new("", "y = x + 1//foo"); + let fidx = 42; + let mut lex0 = TokenStream::new(42, "y = x + 1//foo"); assert_eq!( lex0.next(), Some(Ok(( - Location::new(fname, 0), + Location::new(fidx, 0), Token::var("y"), - Location::new(fname, 1) + Location::new(fidx, 1) ))) ); assert_eq!( lex0.next(), Some(Ok(( - Location::new(fname, 2), + Location::new(fidx, 2), Token::Equals, - Location::new(fname, 3) + Location::new(fidx, 3) ))) ); assert_eq!( lex0.next(), Some(Ok(( - Location::new(fname, 4), + Location::new(fidx, 4), Token::var("x"), - Location::new(fname, 5) + Location::new(fidx, 5) ))) ); assert_eq!( lex0.next(), Some(Ok(( - Location::new(fname, 6), + Location::new(fidx, 6), Token::Operator('+'), - Location::new(fname, 7) + Location::new(fidx, 7) ))) ); assert_eq!( lex0.next(), Some(Ok(( - Location::new(fname, 8), + Location::new(fidx, 8), Token::Number((None, 1)), - Location::new(fname, 9) + Location::new(fidx, 9) ))) ); assert_eq!(lex0.next(), None); @@ -131,7 +123,7 @@ fn stream_works() { #[test] fn errors_work() { - let fname = InternedString::new(""); - let mut lex0 = TokenStream::new("", "\u{2639}"); - assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0)))); + let fidx = 2; + let mut lex0 = TokenStream::new(2, "\u{2639}"); + assert_eq!(lex0.next(), Some(Err(LexerError::new(fidx, 0)))); } diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 0f4790b..9220b85 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -41,7 +41,9 @@ impl fmt::Display for Token { Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), Token::Number((Some(10), v)) => write!(f, "'{}'", v), Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), - Token::Number((Some(b), v)) => write!(f, "Invalidly-based-number", b, v), + Token::Number((Some(b), v)) => { + write!(f, "Invalidly-based-number", b, v) + } Token::Variable(s) => write!(f, "'{}'", s), Token::Error => write!(f, ""), } @@ -64,7 +66,6 @@ fn parse_number( Some(radix) => (radix, &value.slice()[2..]), }; - println!("HERE! (radix {}, slice |{}|", radix, strval); let intval = i128::from_str_radix(strval, radix as u32)?; Ok((base, intval)) }