📜 Add better documentation across the compiler. (#3)

These changes pay particular attention to API endpoints, to try to ensure that any rustdocs generated are detailed and sensible. A good next step, eventually, might be to include doctest examples, as well. For the moment, it's not clear that they would provide a lot of value, though. In addition, this does a couple refactors to simplify the code base in ways that make things clearer or, at least, briefer.
2023-05-13 14:34:48 -05:00
parent f4594bf2cc
commit 1fbfd0c2d2
28 changed files with 1550 additions and 432 deletions
--- a/src/syntax.rs
+++ b/src/syntax.rs
@@ -1,12 +1,36 @@
+//! NGR Parsing: Reading input, turning it into sense (or errors).
+//!
+//! This module implement the front end of the compiler, which is responsible for
+//! reading in NGR syntax as a string, turning it into a series of reasonable Rust
+//! structures for us to manipulate, and doing some validation while it's at it.
+//!
+//! The core flow for this work is:
+//!
+//!   * Turning the string into a series of language-specific [`Token`]s.
+//!   * Taking those tokens, and computing a basic syntax tree from them,
+//!     using our parser ([`ProgramParser`] or [`StatementParser`], generated
+//!     by [`lalrpop`](https://lalrpop.github.io/lalrpop/)).
+//!   * Validating the tree we have parsed, using [`Program::validate`],
+//!     returning any warnings or errors we have found.
+//!
+//! In addition to all of this, we make sure that the structures defined in this
+//! module are all:
+//!
+//!   * Instances of [`Pretty`](::pretty::Pretty), so that you can print stuff back
+//!     out that can be read by a human.
+//!   * Instances of [`Arbitrary`](proptest::prelude::Arbitrary), so they can be
+//!     used in `proptest`-based property testing. There are built-in tests in
+//!     the library, for example, to make sure that the pretty-printing round-trips.
+//!   * Can be evaluated using an `eval` function, for comparison with later
+//!     versions of the function downstream.
 use codespan_reporting::{diagnostic::Diagnostic, files::SimpleFiles};
 use lalrpop_util::lalrpop_mod;
 use logos::Logos;

 mod arbitrary;
-pub mod ast;
+mod ast;
 mod eval;
 mod location;
-mod simplify;
 mod tokens;
 lalrpop_mod!(
    #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
@@ -18,7 +42,7 @@ mod validate;

 pub use crate::syntax::ast::*;
 pub use crate::syntax::location::Location;
-use crate::syntax::parser::ProgramParser;
+pub use crate::syntax::parser::{ProgramParser, StatementParser};
 pub use crate::syntax::tokens::{LexerError, Token};
 #[cfg(test)]
 use ::pretty::{Arena, Pretty};
@@ -29,33 +53,62 @@ use proptest::{prop_assert, prop_assert_eq};
 use std::str::FromStr;
 use thiserror::Error;

-use self::parser::StatementParser;
-
+/// One of the many errors that can occur when processing text input.
+///
+/// If you get one of these and want to display it to the user, we strongly
+/// suggest using the [`From`] implementation to turn this into a [`Diagnostic`],
+/// and then printing it via [`codespan_reporting`].
 #[derive(Debug, Error)]
 pub enum ParserError {
+    /// Raised by the lexer when we see some text that doesn't make
+    /// any sense in the language.
    #[error("Invalid token")]
    InvalidToken(Location),
+
+    /// Raised when we're parsing the file and run into an EOF in a
+    /// place we really weren't expecting.
    #[error("Unrecognized EOF")]
    UnrecognizedEOF(Location, Vec<String>),
+
+    /// Raised when we're parsing the file, and run into a token in a
+    /// place we weren't expecting it.
    #[error("Unrecognized token")]
    UnrecognizedToken(Location, Location, Token, Vec<String>),
+
+    /// Raised when we were expecting the end of the file, but instead
+    /// got another token.
    #[error("Extra token")]
    ExtraToken(Location, Token, Location),
+
+    /// Raised when the lexer just had some sort of internal problem
+    /// and just gave up.
    #[error("Lexing failure")]
    LexFailure(Location),
+
+    /// Raised when we tried to reference a file, or add a file, to our
+    /// file database, and the database ran into a problem.
    #[error("File database error")]
    FileDatabaseError(#[from] codespan_reporting::files::Error),
+
+    /// Raised when the OS is having problems giving us data.
    #[error("Read error")]
    ReadError(#[from] std::io::Error),
 }

 impl ParserError {
+    /// Convert one of lalrpop's parser errors into one of our own, which we can more
+    /// easily implement translation into [`Diagnostic`].
+    ///
+    /// This function is relatively straightforward, because we match the errors pretty
+    /// closely. The major thing we do here is convert [`lalrpop`]'s notion of a location,
+    /// which is just an offset that it got from the lexer, into an actual location that
+    /// we can use in our [`Diagnostic`]s.
    fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
        match err {
            ParseError::InvalidToken { location } => {
                ParserError::InvalidToken(Location::new(file_idx, location))
            }
-            ParseError::UnrecognizedEOF { location, expected } => {
+            ParseError::UnrecognizedEof { location, expected } => {
                ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
            }
            ParseError::UnrecognizedToken {
@@ -83,6 +136,10 @@ impl ParserError {
    }
 }

+/// This is just a nice little function to print out what we expected, if
+/// we had some expectations. Because English is a little wonky, there's
+/// some odd stuff with whether we get 0, 1, 2, or more, and it's nice to
+/// just split that bit of logic out.
 fn display_expected(expected: &[String]) -> String {
    match expected.len() {
        0 => "".to_string(),
@@ -96,6 +153,8 @@ fn display_expected(expected: &[String]) -> String {
    }
 }

+/// Given a list of strings, comma separate (with a space) them, as in an
+/// English list.
 fn comma_separate(strings: &[String]) -> String {
    let mut result = String::new();

@@ -125,12 +184,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
                let expected_str =
                    format!("unexpected token {}{}", token, display_expected(expected));
                let unexpected_str = format!("unexpected token {}", token);
-                let mut labels = start.range_label(end);
+                let labels = start.range_label(end);

                Diagnostic::error()
                    .with_labels(
                        labels
-                            .drain(..)
+                            .into_iter()
                            .map(|l| l.with_message(unexpected_str.clone()))
                            .collect(),
                    )
@@ -142,12 +201,12 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
                let expected_str =
                    format!("unexpected token {} after the expected end of file", token);
                let unexpected_str = format!("unexpected token {}", token);
-                let mut labels = start.range_label(end);
+                let labels = start.range_label(end);

                Diagnostic::error()
                    .with_labels(
                        labels
-                            .drain(..)
+                            .into_iter()
                            .map(|l| l.with_message(unexpected_str.clone()))
                            .collect(),
                    )
@@ -167,6 +226,14 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
 }

 impl Program {
+    /// Parse the given file, adding it to the database as part of the process.
+    ///
+    /// This operation reads the file from disk and adds it to the database for future
+    /// reference. If you get an error, we strongly suggest conversion to [`Diagnostic`]
+    /// and then reporting it to the user via [`codespan_reporting`]. You should use
+    /// this function if you're pretty sure that you've never seen this file before,
+    /// and [`Program::parse`] if you have and know its index and already have it in
+    /// memory.
    pub fn parse_file(
        file_database: &mut SimpleFiles<String, String>,
        file_name: &str,
@@ -177,6 +244,11 @@ impl Program {
        Program::parse(file_handle, file_db_info.source())
    }

+    /// Parse a block of text you have in memory, using the given index for [`Location`]s.
+    ///
+    /// If you use a nonsensical file index, everything will work fine until you try to
+    /// report an error, at which point [`codespan_reporting`] may have some nasty things
+    /// to say to you.
    pub fn parse(file_idx: usize, buffer: &str) -> Result<Program, ParserError> {
        let lexer = Token::lexer(buffer)
            .spanned()
@@ -188,6 +260,12 @@ impl Program {
 }

 impl Statement {
+    /// Parse a statement that you have in memory, using the given index for [`Location`]s.
+    ///
+    /// As with [`Program::parse`], if you use a bad file index, you'll get weird behaviors
+    /// when you try to print errors, but things should otherwise work fine. This function
+    /// will only parse a single statement, which is useful in the REPL, but probably shouldn't
+    /// be used when reading in whole files.
    pub fn parse(file_idx: usize, buffer: &str) -> Result<Statement, ParserError> {
        let lexer = Token::lexer(buffer)
            .spanned()