diff --git a/src/syntax.rs b/src/syntax.rs index 0fdc2e2..063e3e2 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -10,7 +10,7 @@ //! * Taking those tokens, and computing a basic syntax tree from them, //! using our parser ([`ProgramParser`] or [`StatementParser`], generated //! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)). -//! * Validating the tree we have parsed, using the [`validate`] module, +//! * Validating the tree we have parsed, using [`Program::validate`], //! returning any warnings or errors we have found. //! //! In addition to all of this, we make sure that the structures defined in this @@ -38,7 +38,7 @@ lalrpop_mod!( "/syntax/parser.rs" ); mod pretty; -pub mod validate; +mod validate; pub use crate::syntax::ast::*; pub use crate::syntax::location::Location; @@ -53,25 +53,56 @@ use proptest::{prop_assert, prop_assert_eq}; use std::str::FromStr; use thiserror::Error; +/// One of the many errors that can occur when processing text input. +/// +/// If you get one of these and want to display it to the user, we strongly +/// suggest using the [`From`] implementation to turn this into a [`Diagnostic`], +/// and then printing it via [`codespan_reporting`]. #[derive(Debug, Error)] pub enum ParserError { + /// Raised by the lexer when we see some text that doesn't make + /// any sense in the language. #[error("Invalid token")] InvalidToken(Location), + + /// Raised when we're parsing the file and run into an EOF in a + /// place we really weren't expecting. #[error("Unrecognized EOF")] UnrecognizedEOF(Location, Vec), + + /// Raised when we're parsing the file, and run into a token in a + /// place we weren't expecting it. #[error("Unrecognized token")] UnrecognizedToken(Location, Location, Token, Vec), + + /// Raised when we were expecting the end of the file, but instead + /// got another token. #[error("Extra token")] ExtraToken(Location, Token, Location), + + /// Raised when the lexer just had some sort of internal problem + /// and just gave up. #[error("Lexing failure")] LexFailure(Location), + + /// Raised when we tried to reference a file, or add a file, to our + /// file database, and the database ran into a problem. #[error("File database error")] FileDatabaseError(#[from] codespan_reporting::files::Error), + + /// Raised when the OS is having problems giving us data. #[error("Read error")] ReadError(#[from] std::io::Error), } impl ParserError { + /// Convert one of lalrpop's parser errors into one of our own, which we can more + /// easily implement translation into [`Diagnostic`]. + /// + /// This function is relatively straightforward, because we match the errors pretty + /// closely. The major thing we do here is convert [`lalrpop`]'s notion of a location, + /// which is just an offset that it got from the lexer, into an actual location that + /// we can use in our [`Diagnostic`]s. fn convert(file_idx: usize, err: ParseError) -> Self { match err { ParseError::InvalidToken { location } => { @@ -105,6 +136,10 @@ impl ParserError { } } +/// This is just a nice little function to print out what we expected, if +/// we had some expectations. Because English is a little wonky, there's +/// some odd stuff with whether we get 0, 1, 2, or more, and it's nice to +/// just split that bit of logic out. fn display_expected(expected: &[String]) -> String { match expected.len() { 0 => "".to_string(), @@ -118,6 +153,8 @@ fn display_expected(expected: &[String]) -> String { } } +/// Given a list of strings, comma separate (with a space) them, as in an +/// English list. fn comma_separate(strings: &[String]) -> String { let mut result = String::new(); @@ -189,6 +226,14 @@ impl<'a> From<&'a ParserError> for Diagnostic { } impl Program { + /// Parse the given file, adding it to the database as part of the process. + /// + /// This operation reads the file from disk and adds it to the database for future + /// reference. If you get an error, we strongly suggest conversion to [`Diagnostic`] + /// and then reporting it to the user via [`codespan_reporting`]. You should use + /// this function if you're pretty sure that you've never seen this file before, + /// and [`Program::parse`] if you have and know its index and already have it in + /// memory. pub fn parse_file( file_database: &mut SimpleFiles, file_name: &str, @@ -199,6 +244,11 @@ impl Program { Program::parse(file_handle, file_db_info.source()) } + /// Parse a block of text you have in memory, using the given index for [`Location`]s. + /// + /// If you use a nonsensical file index, everything will work fine until you try to + /// report an error, at which point [`codespan_reporting`] may have some nasty things + /// to say to you. pub fn parse(file_idx: usize, buffer: &str) -> Result { let lexer = Token::lexer(buffer) .spanned() @@ -210,6 +260,12 @@ impl Program { } impl Statement { + /// Parse a statement that you have in memory, using the given index for [`Location`]s. + /// + /// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors + /// when you try to print errors, but things should otherwise work fine. This function + /// will only parse a single statement, which is useful in the REPL, but probably shouldn't + /// be used when reading in whole files. pub fn parse(file_idx: usize, buffer: &str) -> Result { let lexer = Token::lexer(buffer) .spanned() diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index ad28025..ee84be4 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,12 +1,32 @@ use crate::syntax::Location; +/// The set of valid binary operators. pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"]; +/// A structure represented a parsed program. +/// +/// One `Program` is associated with exactly one input file, and the +/// vector is arranged in exactly the same order as the parsed file. +/// Because this is the syntax layer, the program is guaranteed to be +/// syntactically valid, but may be nonsense. There could be attempts +/// to use unbound variables, for example, until after someone runs +/// `validate` and it comes back without errors. #[derive(Clone, Debug, PartialEq)] pub struct Program { pub statements: Vec, } +/// A parsed statement. +/// +/// Statements are guaranteed to be syntactically valid, but may be +/// complete nonsense at the semantic level. Which is to say, all the +/// print statements were correctly formatted, and all the variables +/// referenced are definitely valid symbols, but they may not have +/// been defined or anything. +/// +/// Note that equivalence testing on statements is independent of +/// source location; it is testing if the two statements say the same +/// thing, not if they are the exact same statement. #[derive(Clone, Debug)] pub enum Statement { Binding(Location, String, Expression), @@ -28,6 +48,12 @@ impl PartialEq for Statement { } } +/// An expression in the underlying syntax. +/// +/// Like statements, these expressions are guaranteed to have been +/// formatted correctly, but may not actually make any sense. Also +/// like Statements, the [`PartialEq`] implementation does not take +/// source positions into account. #[derive(Clone, Debug)] pub enum Expression { Value(Location, Value), @@ -54,7 +80,9 @@ impl PartialEq for Expression { } } +/// A value from the source syntax #[derive(Clone, Debug, PartialEq, Eq)] pub enum Value { + /// The value of the number, and an optional base that it was written in Number(Option, i64), } diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index 15e7b85..84ac89f 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -4,11 +4,23 @@ use crate::eval::{EvalEnvironment, EvalError, Value}; use crate::syntax::{Expression, Program, Statement}; impl Program { + /// Evaluate the program, returning either an error or what it prints out when run. + /// + /// Doing this evaluation is particularly useful for testing, to ensure that if we + /// modify a program in some way it does the same thing on both sides of the + /// transformation. It's also sometimes just nice to know what a program will be + /// doing. + /// + /// Note that the errors here are slightly more strict that we enforce at runtime. + /// For example, we check for overflow and underflow errors during evaluation, and + /// we don't check for those in the compiled code. pub fn eval(&self) -> Result { let mut env = EvalEnvironment::empty(); let mut stdout = String::new(); for stmt in self.statements.iter() { + // at this point, evaluation is pretty simple. just walk through each + // statement, in order, and record printouts as we come to them. match stmt { Statement::Binding(_, name, value) => { let actual_value = value.eval(&env)?; @@ -40,6 +52,7 @@ impl Expression { let mut arg_values = Vec::with_capacity(args.len()); for arg in args.iter() { + // yay, recursion! makes this pretty straightforward arg_values.push(arg.eval(env)?); } diff --git a/src/syntax/validate.rs b/src/syntax/validate.rs index da2410c..4c2fc5c 100644 --- a/src/syntax/validate.rs +++ b/src/syntax/validate.rs @@ -2,6 +2,13 @@ use crate::syntax::{Expression, Location, Program, Statement}; use codespan_reporting::diagnostic::Diagnostic; use std::collections::HashMap; +/// An error we found while validating the input program. +/// +/// These errors indicate that we should stop trying to compile +/// the program, because it's just fundamentally broken in a way +/// that we're not going to be able to work through. As with most +/// of these errors, we recommend converting this to a [`Diagnostic`] +/// and using [`codespan_reporting`] to present them to the user. pub enum Error { UnboundVariable(Location, String), } @@ -16,6 +23,13 @@ impl From for Diagnostic { } } +/// A problem we found validating the input that isn't critical. +/// +/// These are things that the user might want to do something about, +/// but we can keep going without it being a problem. As with most of +/// these things, if you want to present this information to the user, +/// the best way to do so is via [`From`] and [`Diagnostic`], and then +/// interactions via [`codespan_reporting`]. #[derive(Debug, PartialEq, Eq)] pub enum Warning { ShadowedVariable(Location, Location, String), @@ -37,6 +51,11 @@ impl From for Diagnostic { } impl Program { + /// Validate that the program makes semantic sense, not just syntactic sense. + /// + /// This checks for things like references to variables that don't exist, for + /// example, and generates warnings for things that are inadvisable but not + /// actually a problem. pub fn validate(&self) -> (Vec, Vec) { let mut errors = vec![]; let mut warnings = vec![]; @@ -53,6 +72,15 @@ impl Program { } impl Statement { + /// Validate that the statement makes semantic sense, not just syntactic sense. + /// + /// This checks for things like references to variables that don't exist, for + /// example, and generates warnings for things that are inadvisable but not + /// actually a problem. Since statements appear in a broader context, you'll + /// need to provide the set of variables that are bound where this statement + /// occurs. We use a `HashMap` to map these bound locations to the locations + /// where their bound, because these locations are handy when generating errors + /// and warnings. pub fn validate( &self, bound_variables: &mut HashMap,