📜 Add better documentation across the compiler. (#3)

These changes pay particular attention to API endpoints, to try to
ensure that any rustdocs generated are detailed and sensible. A good
next step, eventually, might be to include doctest examples, as well.
For the moment, it's not clear that they would provide a lot of value,
though.

In addition, this does a couple refactors to simplify the code base in
ways that make things clearer or, at least, briefer.
This commit is contained in:
2023-05-13 14:34:48 -05:00
parent f4594bf2cc
commit 1fbfd0c2d2
28 changed files with 1550 additions and 432 deletions

View File

@@ -1,12 +1,36 @@
//! NGR Parsing: Reading input, turning it into sense (or errors).
//!
//! This module implement the front end of the compiler, which is responsible for
//! reading in NGR syntax as a string, turning it into a series of reasonable Rust
//! structures for us to manipulate, and doing some validation while it's at it.
//!
//! The core flow for this work is:
//!
//! * Turning the string into a series of language-specific [`Token`]s.
//! * Taking those tokens, and computing a basic syntax tree from them,
//! using our parser ([`ProgramParser`] or [`StatementParser`], generated
//! by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
//! * Validating the tree we have parsed, using [`Program::validate`],
//! returning any warnings or errors we have found.
//!
//! In addition to all of this, we make sure that the structures defined in this
//! module are all:
//!
//! * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back
//! out that can be read by a human.
//! * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be
//! used in `proptest`-based property testing. There are built-in tests in
//! the library, for example, to make sure that the pretty-printing round-trips.
//! * Can be evaluated using an `eval` function, for comparison with later
//! versions of the function downstream.
use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
use lalrpop_util::lalrpop_mod;
use logos::Logos;
mod arbitrary;
pub mod ast;
mod ast;
mod eval;
mod location;
mod simplify;
mod tokens;
lalrpop_mod!(
#[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
@@ -18,7 +42,7 @@ mod validate;
pub use crate::syntax::ast::*;
pub use crate::syntax::location::Location;
use crate::syntax::parser::ProgramParser;
pub use crate::syntax::parser::{ProgramParser, StatementParser};
pub use crate::syntax::tokens::{LexerError, Token};
#[cfg(test)]
use ::pretty::{Arena, Pretty};
@@ -29,33 +53,62 @@ use proptest::{prop_assert, prop_assert_eq};
use std::str::FromStr;
use thiserror::Error;
use self::parser::StatementParser;
/// One of the many errors that can occur when processing text input.
///
/// If you get one of these and want to display it to the user, we strongly
/// suggest using the [`From`] implementation to turn this into a [`Diagnostic`],
/// and then printing it via [`codespan_reporting`].
#[derive(Debug, Error)]
pub enum ParserError {
/// Raised by the lexer when we see some text that doesn't make
/// any sense in the language.
#[error("Invalid token")]
InvalidToken(Location),
/// Raised when we're parsing the file and run into an EOF in a
/// place we really weren't expecting.
#[error("Unrecognized EOF")]
UnrecognizedEOF(Location, Vec<String>),
/// Raised when we're parsing the file, and run into a token in a
/// place we weren't expecting it.
#[error("Unrecognized token")]
UnrecognizedToken(Location, Location, Token, Vec<String>),
/// Raised when we were expecting the end of the file, but instead
/// got another token.
#[error("Extra token")]
ExtraToken(Location, Token, Location),
/// Raised when the lexer just had some sort of internal problem
/// and just gave up.
#[error("Lexing failure")]
LexFailure(Location),
/// Raised when we tried to reference a file, or add a file, to our
/// file database, and the database ran into a problem.
#[error("File database error")]
FileDatabaseError(#[from] codespan_reporting::files::Error),
/// Raised when the OS is having problems giving us data.
#[error("Read error")]
ReadError(#[from] std::io::Error),
}
impl ParserError {
/// Convert one of lalrpop's parser errors into one of our own, which we can more
/// easily implement translation into [`Diagnostic`].
///
/// This function is relatively straightforward, because we match the errors pretty
/// closely. The major thing we do here is convert [`lalrpop`]'s notion of a location,
/// which is just an offset that it got from the lexer, into an actual location that
/// we can use in our [`Diagnostic`]s.
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
match err {
ParseError::InvalidToken { location } => {
ParserError::InvalidToken(Location::new(file_idx, location))
}
ParseError::UnrecognizedEOF { location, expected } => {
ParseError::UnrecognizedEof { location, expected } => {
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
}
ParseError::UnrecognizedToken {
@@ -83,6 +136,10 @@ impl ParserError {
}
}
/// This is just a nice little function to print out what we expected, if
/// we had some expectations. Because English is a little wonky, there's
/// some odd stuff with whether we get 0, 1, 2, or more, and it's nice to
/// just split that bit of logic out.
fn display_expected(expected: &[String]) -> String {
match expected.len() {
0 => "".to_string(),
@@ -96,6 +153,8 @@ fn display_expected(expected: &[String]) -> String {
}
}
/// Given a list of strings, comma separate (with a space) them, as in an
/// English list.
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
@@ -125,12 +184,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
let expected_str =
format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
let labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
@@ -142,12 +201,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
let expected_str =
format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token);
let mut labels = start.range_label(end);
let labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.drain(..)
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
@@ -167,6 +226,14 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
}
impl Program {
/// Parse the given file, adding it to the database as part of the process.
///
/// This operation reads the file from disk and adds it to the database for future
/// reference. If you get an error, we strongly suggest conversion to [`Diagnostic`]
/// and then reporting it to the user via [`codespan_reporting`]. You should use
/// this function if you're pretty sure that you've never seen this file before,
/// and [`Program::parse`] if you have and know its index and already have it in
/// memory.
pub fn parse_file(
file_database: &mut SimpleFiles<String, String>,
file_name: &str,
@@ -177,6 +244,11 @@ impl Program {
Program::parse(file_handle, file_db_info.source())
}
/// Parse a block of text you have in memory, using the given index for [`Location`]s.
///
/// If you use a nonsensical file index, everything will work fine until you try to
/// report an error, at which point [`codespan_reporting`] may have some nasty things
/// to say to you.
pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()
@@ -188,6 +260,12 @@ impl Program {
}
impl Statement {
/// Parse a statement that you have in memory, using the given index for [`Location`]s.
///
/// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors
/// when you try to print errors, but things should otherwise work fine. This function
/// will only parse a single statement, which is useful in the REPL, but probably shouldn't
/// be used when reading in whole files.
pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
let lexer = Token::lexer(buffer)
.spanned()