🤷 The initial version of the compiler, both static and JIT.

This implements a full compiler, with both static compilation and JIT
support, for the world's simplest and silliest programming language. You
can do math, and print variables. That's it. On the bright side, it
implements every part of the compiler, from the lexer and parser;
through analysis and simplification; and into a reasonable code
generator. This should be a good jumping off point for adding more
advanced features.

Tests, including proptests, are included to help avoid regressions.
This commit is contained in:
2020-08-01 20:45:33 -07:00
commit b2f6b12ced
30 changed files with 2178 additions and 0 deletions

272
src/syntax.rs Normal file
View File

@@ -0,0 +1,272 @@
use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
use lalrpop_util::lalrpop_mod;
use logos::Logos;
mod arbitrary;
pub mod ast;
mod location;
mod simplify;
mod tokens;
lalrpop_mod!(
#[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
parser,
"/syntax/parser.rs"
);
mod pretty;
mod validate;
pub use crate::syntax::ast::*;
pub use crate::syntax::location::Location;
use crate::syntax::parser::ProgramParser;
pub use crate::syntax::tokens::{LexerError, Token};
#[cfg(test)]
use ::pretty::{Arena, Pretty};
use lalrpop_util::ParseError;
#[cfg(test)]
use proptest::{prop_assert, prop_assert_eq};
#[cfg(test)]
use std::str::FromStr;
use thiserror::Error;
use self::parser::StatementParser;
#[derive(Debug, Error)]
pub enum ParserError {
#[error("Invalid token")]
InvalidToken(Location),
#[error("Unrecognized EOF")]
UnrecognizedEOF(Location, Vec<String>),
#[error("Unrecognized token")]
UnrecognizedToken(Location, Location, Token, Vec<String>),
#[error("Extra token")]
ExtraToken(Location, Token, Location),
#[error("Lexing failure")]
LexFailure(Location),
#[error("File database error")]
FileDatabaseError(#[from] codespan_reporting::files::Error),
#[error("Read error")]
ReadError(#[from] std::io::Error),
}
impl ParserError {
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
match err {
ParseError::InvalidToken { location } => {
ParserError::InvalidToken(Location::new(file_idx, location))
}
ParseError::UnrecognizedEOF { location, expected } => {
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
}
ParseError::UnrecognizedToken {
token: (start, token, end),
expected,
} => ParserError::UnrecognizedToken(
Location::new(file_idx, start),
Location::new(file_idx, end),
token,
expected,
),
ParseError::ExtraToken {
token: (start, token, end),
} => ParserError::ExtraToken(
Location::new(file_idx, start),
token,
Location::new(file_idx, end),
),
ParseError::User { error } => match error {
LexerError::LexFailure(offset) => {
ParserError::LexFailure(Location::new(file_idx, offset))
}
},
}
}
}
fn display_expected(expected: &[String]) -> String {
match expected.len() {
0 => "".to_string(),
1 => format!("; expected {}", expected[0]),
2 => format!("; expected {} or {}", expected[0], expected[1]),
n => format!(
"; expected {}or {}",
comma_separate(&expected[0..n - 1]),
expected[n - 1]
),
}
}
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
for s in strings.iter() {
result.push_str(s);
result.push_str(", ");
}
result
}
impl<'a> From<&'a ParserError> for Diagnostic<usize> {
fn from(value: &ParserError) -> Self {
match value {
// this was just a token we didn't understand
ParserError::InvalidToken(location) => location
.labelled_error("extremely odd token")
.with_message("encountered extremely confusing token"),
// unexpected EOF!
ParserError::UnrecognizedEOF(location, expected) => location.error().with_message(
format!("expected enf of file{}", display_expected(expected)),
),
// encountered a token where it shouldn't be
ParserError::UnrecognizedToken(start, end, token, expected) => {
let expected_str =
format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// I think we get this when we get a token, but were expected EOF
ParserError::ExtraToken(start, token, end) => {
let expected_str =
format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
}
// simple lexer errors
ParserError::LexFailure(location) => {
location.error().with_message("unexpected character")
}
ParserError::FileDatabaseError(e) => Diagnostic::error().with_message(e.to_string()),
ParserError::ReadError(e) => Diagnostic::error().with_message(e.to_string()),
}
}
}
impl Program {
pub fn parse_file(
file_database: &mut SimpleFiles<String, String>,
file_name: &str,
) -> Result<Self, ParserError> {
let file_contents = std::fs::read_to_string(file_name)?;
let file_handle = file_database.add(file_name.to_string(), file_contents);
let file_db_info = file_database.get(file_handle)?;
Program::parse(file_handle, file_db_info.source())
}
pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()
.map(|(token, range)| (range.start, token, range.end));
ProgramParser::new()
.parse(file_idx, lexer)
.map_err(|e| ParserError::convert(file_idx, e))
}
}
impl Statement {
pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()
.map(|(token, range)| (range.start, token, range.end));
StatementParser::new()
.parse(file_idx, lexer)
.map_err(|e| ParserError::convert(file_idx, e))
}
}
#[cfg(test)]
impl FromStr for Program {
type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> {
Program::parse(0, s)
}
}
#[test]
fn order_of_operations() {
let muladd1 = "x = 1 + 2 * 3;";
let testfile = 0;
assert_eq!(
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![Statement::Binding(
Location::new(testfile, 0),
"x".to_string(),
Expression::Primitive(
Location::new(testfile, 6),
"+".to_string(),
vec![
Expression::Value(Location::new(testfile, 4), Value::Number(None, 1)),
Expression::Primitive(
Location::new(testfile, 10),
"*".to_string(),
vec![
Expression::Value(
Location::new(testfile, 8),
Value::Number(None, 2),
),
Expression::Value(
Location::new(testfile, 12),
Value::Number(None, 3),
),
]
)
]
)
),],
}
);
}
proptest::proptest! {
#[test]
fn random_render_parses_equal(program: Program) {
let mut file_database = SimpleFiles::new();
let writer = ::pretty::termcolor::StandardStream::stderr(::pretty::termcolor::ColorChoice::Auto);
let config = codespan_reporting::term::Config::default();
let allocator = Arena::<()>::new();
let mut out_vector = vec![];
prop_assert!(program.pretty(&allocator).render(80, &mut out_vector).is_ok());
let string = std::str::from_utf8(&out_vector).expect("emitted valid string");
let file_handle = file_database.add("test", string);
let file_db_info = file_database.get(file_handle).expect("find thing just inserted");
let parsed = Program::parse(file_handle, file_db_info.source());
if let Err(e) = &parsed {
eprintln!("failed to parse:\n{}", string);
codespan_reporting::term::emit(&mut writer.lock(), &config, &file_database, &e.into()).unwrap();
}
prop_assert_eq!(program, parsed.unwrap());
}
#[test]
fn random_syntaxes_validate(program: Program) {
let (errors, _) = program.validate();
prop_assert!(errors.is_empty());
}
}