Files
ngr/src/syntax.rs

307 lines
11 KiB
Rust

//! NGR Parsing: Reading input, turning it into sense (or errors).
//!
//! This module implement the front end of the compiler, which is responsible for
//! reading in NGR syntax as a string, turning it into a series of reasonable Rust
//! structures for us to manipulate, and doing some validation while it's at it.
//!
//! The core flow for this work is:
//!
//! * Turning the string into a series of language-specific [`Token`]s.
//! * Taking those tokens, and computing a basic syntax tree from them,
//! using our [`parser`].
//! * Validating the tree we have parsed, using the [`validate`] module,
//! returning any warnings or errors we have found.
//! * Simplifying the tree we have parsed, using the [`simplify`] module,
//! into something that's more easily turned into our [compiler internal
//! representation](super::ir).
//!
//! In addition to all of this, we make sure that the structures defined in this
//! module are all:
//!
//! * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back
//! out that can be read by a human.
//! * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be
//! used in `proptest`-based property testing. There are built-in tests in
//! the library, for example, to make sure that the pretty-printing round-trips.
//! * Can be evaluated using an `eval` function, for comparison with later
//! versions of the function downstream.
use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
use lalrpop_util::lalrpop_mod;
use logos::Logos;
mod arbitrary;
mod ast;
mod eval;
mod location;
pub mod simplify;
mod tokens;
lalrpop_mod!(
#[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
pub parser,
"/syntax/parser.rs"
);
mod pretty;
pub mod validate;
pub use crate::syntax::ast::*;
pub use crate::syntax::location::Location;
use crate::syntax::parser::ProgramParser;
pub use crate::syntax::tokens::{LexerError, Token};
#[cfg(test)]
use ::pretty::{Arena, Pretty};
use lalrpop_util::ParseError;
#[cfg(test)]
use proptest::{prop_assert, prop_assert_eq};
#[cfg(test)]
use std::str::FromStr;
use thiserror::Error;
use self::parser::StatementParser;
#[derive(Debug, Error)]
pub enum ParserError {
#[error("Invalid token")]
InvalidToken(Location),
#[error("Unrecognized EOF")]
UnrecognizedEOF(Location, Vec<String>),
#[error("Unrecognized token")]
UnrecognizedToken(Location, Location, Token, Vec<String>),
#[error("Extra token")]
ExtraToken(Location, Token, Location),
#[error("Lexing failure")]
LexFailure(Location),
#[error("File database error")]
FileDatabaseError(#[from] codespan_reporting::files::Error),
#[error("Read error")]
ReadError(#[from] std::io::Error),
}
impl ParserError {
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
match err {
ParseError::InvalidToken { location } => {
ParserError::InvalidToken(Location::new(file_idx, location))
}
ParseError::UnrecognizedEOF { location, expected } => {
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
}
ParseError::UnrecognizedToken {
token: (start, token, end),
expected,
} => ParserError::UnrecognizedToken(
Location::new(file_idx, start),
Location::new(file_idx, end),
token,
expected,
),
ParseError::ExtraToken {
token: (start, token, end),
} => ParserError::ExtraToken(
Location::new(file_idx, start),
token,
Location::new(file_idx, end),
),
ParseError::User { error } => match error {
LexerError::LexFailure(offset) => {
ParserError::LexFailure(Location::new(file_idx, offset))
}
},
}
}
}
fn display_expected(expected: &[String]) -> String {
match expected.len() {
0 => "".to_string(),
1 => format!("; expected {}", expected[0]),
2 => format!("; expected {} or {}", expected[0], expected[1]),
n => format!(
"; expected {}or {}",
comma_separate(&expected[0..n - 1]),
expected[n - 1]
),
}
}
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
for s in strings.iter() {
result.push_str(s);
result.push_str(", ");
}
result
}
impl<'a> From<&'a ParserError> for Diagnostic<usize> {
fn from(value: &ParserError) -> Self {
match value {
// this was just a token we didn't understand
ParserError::InvalidToken(location) => location
.labelled_error("extremely odd token")
.with_message("encountered extremely confusing token"),
// unexpected EOF!
ParserError::UnrecognizedEOF(location, expected) => location.error().with_message(
format!("expected enf of file{}", display_expected(expected)),
),
// encountered a token where it shouldn't be
ParserError::UnrecognizedToken(start, end, token, expected) => {
let expected_str =
format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// I think we get this when we get a token, but were expected EOF
ParserError::ExtraToken(start, token, end) => {
let expected_str =
format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// simple lexer errors
ParserError::LexFailure(location) => {
location.error().with_message("unexpected character")
}
ParserError::FileDatabaseError(e) => Diagnostic::error().with_message(e.to_string()),
ParserError::ReadError(e) => Diagnostic::error().with_message(e.to_string()),
}
}
}
impl Program {
pub fn parse_file(
file_database: &mut SimpleFiles<String, String>,
file_name: &str,
) -> Result<Self, ParserError> {
let file_contents = std::fs::read_to_string(file_name)?;
let file_handle = file_database.add(file_name.to_string(), file_contents);
let file_db_info = file_database.get(file_handle)?;
Program::parse(file_handle, file_db_info.source())
}
pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()
.map(|(token, range)| (range.start, token, range.end));
ProgramParser::new()
.parse(file_idx, lexer)
.map_err(|e| ParserError::convert(file_idx, e))
}
}
impl Statement {
pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()
.map(|(token, range)| (range.start, token, range.end));
StatementParser::new()
.parse(file_idx, lexer)
.map_err(|e| ParserError::convert(file_idx, e))
}
}
#[cfg(test)]
impl FromStr for Program {
type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> {
Program::parse(0, s)
}
}
#[test]
fn order_of_operations() {
let muladd1 = "x = 1 + 2 * 3;";
let testfile = 0;
assert_eq!(
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![Statement::Binding(
Location::new(testfile, 0),
"x".to_string(),
Expression::Primitive(
Location::new(testfile, 6),
"+".to_string(),
vec![
Expression::Value(Location::new(testfile, 4), Value::Number(None, 1)),
Expression::Primitive(
Location::new(testfile, 10),
"*".to_string(),
vec![
Expression::Value(
Location::new(testfile, 8),
Value::Number(None, 2),
),
Expression::Value(
Location::new(testfile, 12),
Value::Number(None, 3),
),
]
)
]
)
),],
}
);
}
proptest::proptest! {
#[test]
fn random_render_parses_equal(program: Program) {
let mut file_database = SimpleFiles::new();
let writer = ::pretty::termcolor::StandardStream::stderr(::pretty::termcolor::ColorChoice::Auto);
let config = codespan_reporting::term::Config::default();
let allocator = Arena::<()>::new();
let mut out_vector = vec![];
prop_assert!(program.pretty(&allocator).render(80, &mut out_vector).is_ok());
let string = std::str::from_utf8(&out_vector).expect("emitted valid string");
let file_handle = file_database.add("test", string);
let file_db_info = file_database.get(file_handle).expect("find thing just inserted");
let parsed = Program::parse(file_handle, file_db_info.source());
if let Err(e) = &parsed {
eprintln!("failed to parse:\n{}", string);
codespan_reporting::term::emit(&mut writer.lock(), &config, &file_database, &e.into()).unwrap();
}
prop_assert_eq!(program, parsed.unwrap());
}
#[test]
fn random_syntaxes_validate(program: Program) {
let (errors, _) = program.validate();
prop_assert!(errors.is_empty());
}
#[test]
fn generated_run_or_overflow(program: Program) {
use crate::eval::{EvalError, PrimOpError};
assert!(matches!(program.eval(), Ok(_) | Err(EvalError::PrimOp(PrimOpError::MathFailure(_)))))
}
}