Use codespan for *much* prettier error reporting.

This commit is contained in:
2022-02-16 20:46:13 -08:00
parent 60e7d9a41d
commit 0293eee2d0
7 changed files with 222 additions and 137 deletions

View File

@@ -2,7 +2,7 @@
name = "ngr" name = "ngr"
version = "0.1.0" version = "0.1.0"
authors = ["awick"] authors = ["awick"]
edition = "2018" edition = "2021"
[lib] [lib]
name = "ngr" name = "ngr"
@@ -14,6 +14,8 @@ path = "src/bin.rs"
[dependencies] [dependencies]
clap = { version = "^3.0.14", features = ["derive"] } clap = { version = "^3.0.14", features = ["derive"] }
codespan = "0.11.1"
codespan-reporting = "0.11.1"
lalrpop-util = "^0.19.7" lalrpop-util = "^0.19.7"
lazy_static = "^1.4.0" lazy_static = "^1.4.0"
logos = "^0.12.0" logos = "^0.12.0"

View File

@@ -1,5 +1,11 @@
use clap::Parser; use clap::Parser;
use ngr::syntax::{ParserError, Program}; use codespan_reporting::diagnostic::Diagnostic;
use codespan_reporting::files::SimpleFiles;
use codespan_reporting::term;
use codespan_reporting::term::termcolor::{ColorChoice, StandardStream};
use ngr::error::Error;
use ngr::syntax::Program;
use std::fs;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)] #[clap(author, version, about, long_about = None)]
@@ -9,21 +15,30 @@ struct CommandLineArguments {
output: Option<String>, output: Option<String>,
/// The file to parse /// The file to parse
file: String file: String,
} }
fn real_main() -> Result<(), ParserError> { fn compile_file(
let args = CommandLineArguments::parse(); file_database: &mut SimpleFiles<String, String>,
initial_file_name: &str,
let program = Program::from_file(&args.file)?; ) -> Result<Program, Error> {
println!("args: {:?}", args); let initial_file_contents = fs::read_to_string(initial_file_name)?;
println!("program: {:?}", program); let initial_file = file_database.add(initial_file_name.to_string(), initial_file_contents);
let db_version = file_database.get(initial_file)?;
Ok(()) let db_version_source = db_version.source();
Ok(Program::parse(initial_file, db_version_source)?)
} }
fn main() { fn main() {
if let Err(e) = real_main() { let args = CommandLineArguments::parse();
println!("{}", e); let mut file_database = SimpleFiles::new();
let initial_file_name = &args.file;
if let Err(e) = compile_file(&mut file_database, initial_file_name) {
let diagnostic = Diagnostic::from(e);
let writer = StandardStream::stderr(ColorChoice::Auto);
let config = codespan_reporting::term::Config::default();
term::emit(&mut writer.lock(), &config, &file_database, &diagnostic).unwrap();
} }
} }

141
src/error.rs Normal file
View File

@@ -0,0 +1,141 @@
use crate::syntax::{LexerError, Location, Token};
use codespan_reporting::diagnostic::{Diagnostic, Label};
use codespan_reporting::files;
use lalrpop_util::ParseError;
use std::io;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("IO failure: {0}")]
IOError(#[from] io::Error),
#[error("Internal file database error: {0}")]
InternalFileDBError(#[from] files::Error),
#[error("Error in parser: {0}")]
ParserError(#[from] ParseError<Location, Token, LexerError>),
}
fn locations_to_labels(start: &Location, end: &Location) -> Vec<Label<usize>> {
match start {
Location::Manufactured => match end {
Location::Manufactured => vec![],
Location::InFile(file_id, off) => vec![Label::primary(*file_id, *off..*off)],
},
Location::InFile(file_id1, start) => match end {
Location::InFile(file_id2, end) if file_id1 == file_id2 => {
vec![Label::primary(*file_id1, *start..*end)]
}
_ => vec![Label::primary(*file_id1, *start..*start)],
},
}
}
fn display_expected(expected: &[String]) -> String {
match expected.len() {
0 => "".to_string(),
1 => format!("; expected {}", expected[0]),
2 => format!("; expected {} or {}", expected[0], expected[1]),
n => format!(
"; expected {}or {}",
comma_separate(&expected[0..n - 1]),
expected[n - 1]
),
}
}
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
for s in strings.iter() {
result.push_str(s);
result.push_str(", ");
}
result
}
impl From<Error> for Diagnostic<usize> {
fn from(x: Error) -> Self {
match &x {
Error::IOError(e) => Diagnostic::error().with_message(format!("{}", e)),
Error::InternalFileDBError(e) => Diagnostic::error().with_message(format!("{}", e)),
Error::ParserError(pe) => match pe {
// this was just a token we didn't understand
ParseError::InvalidToken { location } => match location {
Location::Manufactured => Diagnostic::error().with_message(
"encountered extremely confusing token (in generated data?!)",
),
Location::InFile(file_id, off) => Diagnostic::error()
.with_message("encountered extremely confusing token")
.with_labels(vec![Label::primary(*file_id, *off..*off)
.with_message("extremely odd token")]),
},
// unexpected EOF!
ParseError::UnrecognizedEOF { location, expected } => match location {
Location::Manufactured => Diagnostic::error().with_message(format!(
"unexpected end of file{}",
display_expected(expected)
)),
Location::InFile(file_id, off) => Diagnostic::error()
.with_message(format!(
"unexpected enf of file{}",
display_expected(expected)
))
.with_labels(vec![Label::primary(*file_id, *off..*off)]),
},
// encountered a token where it shouldn't be
ParseError::UnrecognizedToken { token, expected } => {
let (start, token, end) = token;
let expected_str =
format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token);
let mut labels = locations_to_labels(start, end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// I think we get this when we get a token, but were expected EOF
ParseError::ExtraToken { token } => {
let (start, token, end) = token;
let expected_str =
format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token);
let mut labels = locations_to_labels(start, end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// simple lexer errors
ParseError::User { error } => match error {
LexerError::LexFailure(location) => match location {
Location::Manufactured => Diagnostic::error()
.with_message("unexpected character encountered in manufactured code?"),
Location::InFile(file_id, offset) => Diagnostic::error()
.with_labels(vec![Label::primary(*file_id, *offset..*offset)
.with_message("unexpected character")]),
},
},
},
}
}
}

View File

@@ -1,2 +1,3 @@
pub mod error;
pub mod syntax; pub mod syntax;
pub mod util; pub mod util;

View File

@@ -2,111 +2,44 @@ use lalrpop_util::lalrpop_mod;
mod token_stream; mod token_stream;
mod tokens; mod tokens;
lalrpop_mod!(parser, "/syntax/parser.rs"); lalrpop_mod!(
#[allow(clippy::just_underscores_and_digits)]
parser,
"/syntax/parser.rs"
);
mod ast; mod ast;
pub use crate::syntax::ast::*; pub use crate::syntax::ast::*;
use crate::syntax::parser::ProgramParser; use crate::syntax::parser::ProgramParser;
use crate::syntax::token_stream::{LexerError, Location, TokenStream}; use crate::syntax::token_stream::TokenStream;
use crate::syntax::tokens::Token; pub use crate::syntax::token_stream::{LexerError, Location};
#[cfg(test)] pub use crate::syntax::tokens::Token;
use crate::util::istring::InternedString;
use lalrpop_util::ParseError; use lalrpop_util::ParseError;
use std::fmt; #[cfg(test)]
use std::fs;
use std::io;
use std::str::FromStr; use std::str::FromStr;
use thiserror::Error;
#[derive(Debug, Error)] type ParserError = ParseError<Location, Token, LexerError>;
pub enum ParserError {
IOError(io::Error),
ParseError(ParseError<Location, Token, LexerError>),
}
impl fmt::Display for ParserError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParserError::IOError(e) => write!(f, "IO error: {}", e),
ParserError::ParseError(ParseError::ExtraToken{ token: (loc, tok, _)}) => {
write!(f, "{}: Unexpected additional token ({}) found at end of file", loc, tok)
}
ParserError::ParseError(ParseError::InvalidToken { location }) => {
write!(f, "{}: Unexpected token encountered", location)
}
ParserError::ParseError(ParseError::UnrecognizedEOF { location, expected }) => {
write!(f, "{}: Unexpected EOF{}", location, display_expected(expected))
}
ParserError::ParseError(ParseError::UnrecognizedToken { token: (location, tok, _), expected }) => {
write!(f, "{}: Unexpected token {}{}", location, tok, display_expected(expected))
}
ParserError::ParseError(ParseError::User{ error }) => {
write!(f, "{}: Couldn't process input (lexer error)", error.location)
}
}
}
}
fn display_expected(expected: &Vec<String>) -> String {
match expected.len() {
0 => "".to_string(),
1 => format!("; expected {}", expected[0]),
2 => format!("; expected {} or {}", expected[0], expected[1]),
n => format!("; expected {}or {}", comma_separate(&expected[0..n-1]), expected[n-1])
}
}
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
for s in strings.iter() {
result.push_str(&s);
result.push_str(", ");
}
result
}
impl From<io::Error> for ParserError {
fn from(x: io::Error) -> Self {
ParserError::IOError(x)
}
}
impl From<ParseError<Location, Token, LexerError>> for ParserError {
fn from(x: ParseError<Location, Token, LexerError>) -> Self {
ParserError::ParseError(x)
}
}
impl Program { impl Program {
pub fn from_file(filename: &str) -> Result<Program, ParserError> { pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
let metadata = fs::metadata(filename)?; let lexer = TokenStream::new(file_idx, buffer);
let mut buffer = String::with_capacity(metadata.len() as usize); ProgramParser::new().parse(lexer)
let lexer = TokenStream::from_file(filename, &mut buffer)?;
Ok(ProgramParser::new().parse(lexer)?)
}
fn parse(filename: &str, buffer: &mut String) -> Result<Program, ParserError> {
let lexer = TokenStream::new(filename, buffer);
Ok(ProgramParser::new().parse(lexer)?)
} }
} }
#[cfg(test)]
impl FromStr for Program { impl FromStr for Program {
type Err = ParserError; type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> { fn from_str(s: &str) -> Result<Program, ParserError> {
let mut s2 = s.to_string(); Program::parse(0, s)
Program::parse("<from_str>", &mut s2)
} }
} }
#[test] #[test]
fn order_of_operations() { fn order_of_operations() {
let muladd1 = "1 + 2 * 3"; let muladd1 = "1 + 2 * 3";
let testfile = InternedString::new("<from_str>"); let testfile = 0;
assert_eq!( assert_eq!(
Program::from_str(muladd1).unwrap(), Program::from_str(muladd1).unwrap(),
Program { Program {

View File

@@ -1,34 +1,25 @@
use crate::syntax::tokens::Token; use crate::syntax::tokens::Token;
use crate::util::istring::InternedString;
use logos::{Logos, SpannedIter}; use logos::{Logos, SpannedIter};
use std::fmt; use std::fmt;
use std::fs::File; use thiserror::Error;
use std::io;
use std::io::Read;
pub struct TokenStream<'s> { pub struct TokenStream<'s> {
filename: InternedString, file_idx: usize,
lexer: SpannedIter<'s, Token>, lexer: SpannedIter<'s, Token>,
} }
impl<'s> TokenStream<'s> { impl<'s> TokenStream<'s> {
pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { pub fn new(file_idx: usize, s: &'s str) -> TokenStream<'s> {
TokenStream { TokenStream {
filename: InternedString::new(filename), file_idx,
lexer: Token::lexer(s).spanned(), lexer: Token::lexer(s).spanned(),
} }
} }
pub fn from_file(filename: &str, buffer: &'s mut String) -> io::Result<TokenStream<'s>> {
let mut file = File::open(filename)?;
file.read_to_string(buffer)?;
Ok(TokenStream::new(filename, buffer))
}
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Location { pub enum Location {
InFile(InternedString, usize), InFile(usize, usize),
Manufactured, Manufactured,
} }
@@ -42,8 +33,8 @@ impl fmt::Display for Location {
} }
impl Location { impl Location {
fn new(filename: InternedString, offset: usize) -> Location { fn new(file_idx: usize, offset: usize) -> Location {
Location::InFile(filename, offset) Location::InFile(file_idx, offset)
} }
} }
@@ -53,14 +44,15 @@ impl Default for Location {
} }
} }
#[derive(Debug, PartialEq)] #[derive(Debug, Error, PartialEq)]
pub struct LexerError { pub enum LexerError {
pub location: Location, #[error("Failed lexing at {0}")]
LexFailure(Location),
} }
impl LexerError { impl LexerError {
fn new(filename: InternedString, offset: usize) -> LexerError { fn new(file_idx: usize, offset: usize) -> LexerError {
LexerError { location: Location::new(filename, offset) } LexerError::LexFailure(Location::new(file_idx, offset))
} }
} }
@@ -72,10 +64,10 @@ impl<'s> Iterator for TokenStream<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() { match self.lexer.next() {
None => None, None => None,
Some((Token::Error, span)) => Some(Err(LexerError::new(self.filename, span.start))), Some((Token::Error, span)) => Some(Err(LexerError::new(self.file_idx, span.start))),
Some((token, span)) => { Some((token, span)) => {
let start = Location::new(self.filename, span.start); let start = Location::new(self.file_idx, span.start);
let end = Location::new(self.filename, span.end); let end = Location::new(self.file_idx, span.end);
Some(Ok((start, token, end))) Some(Ok((start, token, end)))
} }
} }
@@ -84,46 +76,46 @@ impl<'s> Iterator for TokenStream<'s> {
#[test] #[test]
fn stream_works() { fn stream_works() {
let fname = InternedString::new("<file>"); let fidx = 42;
let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo"); let mut lex0 = TokenStream::new(42, "y = x + 1//foo");
assert_eq!( assert_eq!(
lex0.next(), lex0.next(),
Some(Ok(( Some(Ok((
Location::new(fname, 0), Location::new(fidx, 0),
Token::var("y"), Token::var("y"),
Location::new(fname, 1) Location::new(fidx, 1)
))) )))
); );
assert_eq!( assert_eq!(
lex0.next(), lex0.next(),
Some(Ok(( Some(Ok((
Location::new(fname, 2), Location::new(fidx, 2),
Token::Equals, Token::Equals,
Location::new(fname, 3) Location::new(fidx, 3)
))) )))
); );
assert_eq!( assert_eq!(
lex0.next(), lex0.next(),
Some(Ok(( Some(Ok((
Location::new(fname, 4), Location::new(fidx, 4),
Token::var("x"), Token::var("x"),
Location::new(fname, 5) Location::new(fidx, 5)
))) )))
); );
assert_eq!( assert_eq!(
lex0.next(), lex0.next(),
Some(Ok(( Some(Ok((
Location::new(fname, 6), Location::new(fidx, 6),
Token::Operator('+'), Token::Operator('+'),
Location::new(fname, 7) Location::new(fidx, 7)
))) )))
); );
assert_eq!( assert_eq!(
lex0.next(), lex0.next(),
Some(Ok(( Some(Ok((
Location::new(fname, 8), Location::new(fidx, 8),
Token::Number((None, 1)), Token::Number((None, 1)),
Location::new(fname, 9) Location::new(fidx, 9)
))) )))
); );
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
@@ -131,7 +123,7 @@ fn stream_works() {
#[test] #[test]
fn errors_work() { fn errors_work() {
let fname = InternedString::new("<file>"); let fidx = 2;
let mut lex0 = TokenStream::new("<file>", "\u{2639}"); let mut lex0 = TokenStream::new(2, "\u{2639}");
assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0)))); assert_eq!(lex0.next(), Some(Err(LexerError::new(fidx, 0))));
} }

View File

@@ -41,7 +41,9 @@ impl fmt::Display for Token {
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
Token::Number((Some(10), v)) => write!(f, "'{}'", v), Token::Number((Some(10), v)) => write!(f, "'{}'", v),
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
Token::Number((Some(b), v)) => write!(f, "Invalidly-based-number<base={},val={}>", b, v), Token::Number((Some(b), v)) => {
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
}
Token::Variable(s) => write!(f, "'{}'", s), Token::Variable(s) => write!(f, "'{}'", s),
Token::Error => write!(f, "<error>"), Token::Error => write!(f, "<error>"),
} }
@@ -64,7 +66,6 @@ fn parse_number(
Some(radix) => (radix, &value.slice()[2..]), Some(radix) => (radix, &value.slice()[2..]),
}; };
println!("HERE! (radix {}, slice |{}|", radix, strval);
let intval = i128::from_str_radix(strval, radix as u32)?; let intval = i128::from_str_radix(strval, radix as u32)?;
Ok((base, intval)) Ok((base, intval))
} }