//! NGR Parsing: Reading input, turning it into sense (or errors). //! //! This module implement the front end of the compiler, which is responsible for //! reading in NGR syntax as a string, turning it into a series of reasonable Rust //! structures for us to manipulate, and doing some validation while it's at it. //! //! The core flow for this work is: //! //! * Turning the string into a series of language-specific [`Token`]s. //! * Taking those tokens, and computing a basic syntax tree from them, //! using our [`parser`]. //! * Validating the tree we have parsed, using the [`validate`] module, //! returning any warnings or errors we have found. //! * Simplifying the tree we have parsed, using the [`simplify`] module, //! into something that's more easily turned into our [compiler internal //! representation](super::ir). //! //! In addition to all of this, we make sure that the structures defined in this //! module are all: //! //! * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back //! out that can be read by a human. //! * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be //! used in `proptest`-based property testing. There are built-in tests in //! the library, for example, to make sure that the pretty-printing round-trips. //! * Can be evaluated using an `eval` function, for comparison with later //! versions of the function downstream. use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles}; use lalrpop_util::lalrpop_mod; use logos::Logos; mod arbitrary; mod ast; mod eval; mod location; pub mod simplify; mod tokens; lalrpop_mod!( #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)] pub parser, "/syntax/parser.rs" ); mod pretty; pub mod validate; pub use crate::syntax::ast::*; pub use crate::syntax::location::Location; use crate::syntax::parser::ProgramParser; pub use crate::syntax::tokens::{LexerError, Token}; #[cfg(test)] use ::pretty::{Arena, Pretty}; use lalrpop_util::ParseError; #[cfg(test)] use proptest::{prop_assert, prop_assert_eq}; #[cfg(test)] use std::str::FromStr; use thiserror::Error; use self::parser::StatementParser; #[derive(Debug, Error)] pub enum ParserError { #[error("Invalid token")] InvalidToken(Location), #[error("Unrecognized EOF")] UnrecognizedEOF(Location, Vec), #[error("Unrecognized token")] UnrecognizedToken(Location, Location, Token, Vec), #[error("Extra token")] ExtraToken(Location, Token, Location), #[error("Lexing failure")] LexFailure(Location), #[error("File database error")] FileDatabaseError(#[from] codespan_reporting::files::Error), #[error("Read error")] ReadError(#[from] std::io::Error), } impl ParserError { fn convert(file_idx: usize, err: ParseError) -> Self { match err { ParseError::InvalidToken { location } => { ParserError::InvalidToken(Location::new(file_idx, location)) } ParseError::UnrecognizedEOF { location, expected } => { ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected) } ParseError::UnrecognizedToken { token: (start, token, end), expected, } => ParserError::UnrecognizedToken( Location::new(file_idx, start), Location::new(file_idx, end), token, expected, ), ParseError::ExtraToken { token: (start, token, end), } => ParserError::ExtraToken( Location::new(file_idx, start), token, Location::new(file_idx, end), ), ParseError::User { error } => match error { LexerError::LexFailure(offset) => { ParserError::LexFailure(Location::new(file_idx, offset)) } }, } } } fn display_expected(expected: &[String]) -> String { match expected.len() { 0 => "".to_string(), 1 => format!("; expected {}", expected[0]), 2 => format!("; expected {} or {}", expected[0], expected[1]), n => format!( "; expected {}or {}", comma_separate(&expected[0..n - 1]), expected[n - 1] ), } } fn comma_separate(strings: &[String]) -> String { let mut result = String::new(); for s in strings.iter() { result.push_str(s); result.push_str(", "); } result } impl<'a> From<&'a ParserError> for Diagnostic { fn from(value: &ParserError) -> Self { match value { // this was just a token we didn't understand ParserError::InvalidToken(location) => location .labelled_error("extremely odd token") .with_message("encountered extremely confusing token"), // unexpected EOF! ParserError::UnrecognizedEOF(location, expected) => location.error().with_message( format!("expected enf of file{}", display_expected(expected)), ), // encountered a token where it shouldn't be ParserError::UnrecognizedToken(start, end, token, expected) => { let expected_str = format!("unexpected token {}{}", token, display_expected(expected)); let unexpected_str = format!("unexpected token {}", token); let mut labels = start.range_label(end); Diagnostic::error() .with_labels( labels .drain(..) .map(|l| l.with_message(unexpected_str.clone())) .collect(), ) .with_message(expected_str) } // I think we get this when we get a token, but were expected EOF ParserError::ExtraToken(start, token, end) => { let expected_str = format!("unexpected token {} after the expected end of file", token); let unexpected_str = format!("unexpected token {}", token); let mut labels = start.range_label(end); Diagnostic::error() .with_labels( labels .drain(..) .map(|l| l.with_message(unexpected_str.clone())) .collect(), ) .with_message(expected_str) } // simple lexer errors ParserError::LexFailure(location) => { location.error().with_message("unexpected character") } ParserError::FileDatabaseError(e) => Diagnostic::error().with_message(e.to_string()), ParserError::ReadError(e) => Diagnostic::error().with_message(e.to_string()), } } } impl Program { pub fn parse_file( file_database: &mut SimpleFiles, file_name: &str, ) -> Result { let file_contents = std::fs::read_to_string(file_name)?; let file_handle = file_database.add(file_name.to_string(), file_contents); let file_db_info = file_database.get(file_handle)?; Program::parse(file_handle, file_db_info.source()) } pub fn parse(file_idx: usize, buffer: &str) -> Result { let lexer = Token::lexer(buffer) .spanned() .map(|(token, range)| (range.start, token, range.end)); ProgramParser::new() .parse(file_idx, lexer) .map_err(|e| ParserError::convert(file_idx, e)) } } impl Statement { pub fn parse(file_idx: usize, buffer: &str) -> Result { let lexer = Token::lexer(buffer) .spanned() .map(|(token, range)| (range.start, token, range.end)); StatementParser::new() .parse(file_idx, lexer) .map_err(|e| ParserError::convert(file_idx, e)) } } #[cfg(test)] impl FromStr for Program { type Err = ParserError; fn from_str(s: &str) -> Result { Program::parse(0, s) } } #[test] fn order_of_operations() { let muladd1 = "x = 1 + 2 * 3;"; let testfile = 0; assert_eq!( Program::from_str(muladd1).unwrap(), Program { statements: vec![Statement::Binding( Location::new(testfile, 0), "x".to_string(), Expression::Primitive( Location::new(testfile, 6), "+".to_string(), vec![ Expression::Value(Location::new(testfile, 4), Value::Number(None, 1)), Expression::Primitive( Location::new(testfile, 10), "*".to_string(), vec![ Expression::Value( Location::new(testfile, 8), Value::Number(None, 2), ), Expression::Value( Location::new(testfile, 12), Value::Number(None, 3), ), ] ) ] ) ),], } ); } proptest::proptest! { #[test] fn random_render_parses_equal(program: Program) { let mut file_database = SimpleFiles::new(); let writer = ::pretty::termcolor::StandardStream::stderr(::pretty::termcolor::ColorChoice::Auto); let config = codespan_reporting::term::Config::default(); let allocator = Arena::<()>::new(); let mut out_vector = vec![]; prop_assert!(program.pretty(&allocator).render(80, &mut out_vector).is_ok()); let string = std::str::from_utf8(&out_vector).expect("emitted valid string"); let file_handle = file_database.add("test", string); let file_db_info = file_database.get(file_handle).expect("find thing just inserted"); let parsed = Program::parse(file_handle, file_db_info.source()); if let Err(e) = &parsed { eprintln!("failed to parse:\n{}", string); codespan_reporting::term::emit(&mut writer.lock(), &config, &file_database, &e.into()).unwrap(); } prop_assert_eq!(program, parsed.unwrap()); } #[test] fn random_syntaxes_validate(program: Program) { let (errors, _) = program.validate(); prop_assert!(errors.is_empty()); } #[test] fn generated_run_or_overflow(program: Program) { use crate::eval::{EvalError, PrimOpError}; assert!(matches!(program.eval(), Ok(_) | Err(EvalError::PrimOp(PrimOpError::MathFailure(_))))) } }