Pick this up again, dust it off, get some stuff connected.

This commit is contained in:
2022-02-13 21:30:06 -08:00
parent 91d5d1b4fd
commit 60e7d9a41d
6 changed files with 122 additions and 20 deletions

View File

@@ -13,9 +13,11 @@ name = "ngrc"
path = "src/bin.rs" path = "src/bin.rs"
[dependencies] [dependencies]
lalrpop-util = "0.19.0" clap = { version = "^3.0.14", features = ["derive"] }
lazy_static = "1.4.0" lalrpop-util = "^0.19.7"
logos = "0.11.4" lazy_static = "^1.4.0"
logos = "^0.12.0"
thiserror = "^1.0.30"
[build-dependencies] [build-dependencies]
lalrpop = "0.19.0" lalrpop = "^0.19.7"

4
examples/basic/test1.ngr Normal file
View File

@@ -0,0 +1,4 @@
x = 5;
y = 4*x + 3;
print x;
print y;

View File

@@ -1,3 +1,29 @@
fn main() { use clap::Parser;
println!("Hello, world!"); use ngr::syntax::{ParserError, Program};
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct CommandLineArguments {
/// Optional output file name
#[clap(short, long)]
output: Option<String>,
/// The file to parse
file: String
}
fn real_main() -> Result<(), ParserError> {
let args = CommandLineArguments::parse();
let program = Program::from_file(&args.file)?;
println!("args: {:?}", args);
println!("program: {:?}", program);
Ok(())
}
fn main() {
if let Err(e) = real_main() {
println!("{}", e);
}
} }

View File

@@ -12,16 +12,62 @@ use crate::syntax::tokens::Token;
#[cfg(test)] #[cfg(test)]
use crate::util::istring::InternedString; use crate::util::istring::InternedString;
use lalrpop_util::ParseError; use lalrpop_util::ParseError;
use std::fmt;
use std::fs; use std::fs;
use std::io; use std::io;
use std::str::FromStr; use std::str::FromStr;
use thiserror::Error;
#[derive(Debug)] #[derive(Debug, Error)]
pub enum ParserError { pub enum ParserError {
IOError(io::Error), IOError(io::Error),
ParseError(ParseError<Location, Token, LexerError>), ParseError(ParseError<Location, Token, LexerError>),
} }
impl fmt::Display for ParserError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParserError::IOError(e) => write!(f, "IO error: {}", e),
ParserError::ParseError(ParseError::ExtraToken{ token: (loc, tok, _)}) => {
write!(f, "{}: Unexpected additional token ({}) found at end of file", loc, tok)
}
ParserError::ParseError(ParseError::InvalidToken { location }) => {
write!(f, "{}: Unexpected token encountered", location)
}
ParserError::ParseError(ParseError::UnrecognizedEOF { location, expected }) => {
write!(f, "{}: Unexpected EOF{}", location, display_expected(expected))
}
ParserError::ParseError(ParseError::UnrecognizedToken { token: (location, tok, _), expected }) => {
write!(f, "{}: Unexpected token {}{}", location, tok, display_expected(expected))
}
ParserError::ParseError(ParseError::User{ error }) => {
write!(f, "{}: Couldn't process input (lexer error)", error.location)
}
}
}
}
fn display_expected(expected: &Vec<String>) -> String {
match expected.len() {
0 => "".to_string(),
1 => format!("; expected {}", expected[0]),
2 => format!("; expected {} or {}", expected[0], expected[1]),
n => format!("; expected {}or {}", comma_separate(&expected[0..n-1]), expected[n-1])
}
}
fn comma_separate(strings: &[String]) -> String {
let mut result = String::new();
for s in strings.iter() {
result.push_str(&s);
result.push_str(", ");
}
result
}
impl From<io::Error> for ParserError { impl From<io::Error> for ParserError {
fn from(x: io::Error) -> Self { fn from(x: io::Error) -> Self {
ParserError::IOError(x) ParserError::IOError(x)

View File

@@ -1,6 +1,7 @@
use crate::syntax::tokens::Token; use crate::syntax::tokens::Token;
use crate::util::istring::InternedString; use crate::util::istring::InternedString;
use logos::{Logos, SpannedIter}; use logos::{Logos, SpannedIter};
use std::fmt;
use std::fs::File; use std::fs::File;
use std::io; use std::io;
use std::io::Read; use std::io::Read;
@@ -31,6 +32,15 @@ pub enum Location {
Manufactured, Manufactured,
} }
impl fmt::Display for Location {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Location::InFile(s, off) => write!(f, "{}:{}", s, off),
Location::Manufactured => write!(f, "<manufactured>"),
}
}
}
impl Location { impl Location {
fn new(filename: InternedString, offset: usize) -> Location { fn new(filename: InternedString, offset: usize) -> Location {
Location::InFile(filename, offset) Location::InFile(filename, offset)
@@ -45,14 +55,12 @@ impl Default for Location {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct LexerError { pub struct LexerError {
filename: InternedString, pub location: Location,
offset: usize,
} }
#[cfg(test)]
impl LexerError { impl LexerError {
fn new(filename: InternedString, offset: usize) -> LexerError { fn new(filename: InternedString, offset: usize) -> LexerError {
LexerError { filename, offset } LexerError { location: Location::new(filename, offset) }
} }
} }
@@ -64,10 +72,7 @@ impl<'s> Iterator for TokenStream<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() { match self.lexer.next() {
None => None, None => None,
Some((Token::Error, span)) => Some(Err(LexerError { Some((Token::Error, span)) => Some(Err(LexerError::new(self.filename, span.start))),
filename: self.filename,
offset: span.start,
})),
Some((token, span)) => { Some((token, span)) => {
let start = Location::new(self.filename, span.start); let start = Location::new(self.filename, span.start);
let end = Location::new(self.filename, span.end); let end = Location::new(self.filename, span.end);

View File

@@ -1,18 +1,17 @@
use crate::util::istring::InternedString; use crate::util::istring::InternedString;
use logos::{Lexer, Logos}; use logos::{Lexer, Logos};
use std::fmt;
use std::num::ParseIntError; use std::num::ParseIntError;
#[derive(Logos, Clone, Debug, PartialEq)] #[derive(Logos, Clone, Debug, PartialEq)]
pub enum Token { pub enum Token {
#[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"//.*", logos::skip)]
#[token("=")] #[token("=")]
Equals, Equals,
#[token(";")] #[token(";")]
Semi, Semi,
#[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))] #[regex(r"[+\-*/]", |v| v.slice().chars().next())]
Operator(char), Operator(char),
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))] #[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
@@ -26,9 +25,29 @@ pub enum Token {
Variable(InternedString), Variable(InternedString),
#[error] #[error]
#[regex(r"[ \t\r\n\f]+", logos::skip)]
#[regex(r"//.*", logos::skip)]
Error, Error,
} }
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Token::Equals => write!(f, "'='"),
Token::Semi => write!(f, "';'"),
Token::Operator(c) => write!(f, "'{}'", c),
Token::Number((None, v)) => write!(f, "'{}'", v),
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
Token::Number((Some(b), v)) => write!(f, "Invalidly-based-number<base={},val={}>", b, v),
Token::Variable(s) => write!(f, "'{}'", s),
Token::Error => write!(f, "<error>"),
}
}
}
#[cfg(test)] #[cfg(test)]
impl Token { impl Token {
pub(crate) fn var(s: &str) -> Token { pub(crate) fn var(s: &str) -> Token {
@@ -36,9 +55,9 @@ impl Token {
} }
} }
fn parse_number<'a, 'src>( fn parse_number(
base: Option<u8>, base: Option<u8>,
value: &'a Lexer<'src, Token>, value: &Lexer<Token>,
) -> Result<(Option<u8>, i128), ParseIntError> { ) -> Result<(Option<u8>, i128), ParseIntError> {
let (radix, strval) = match base { let (radix, strval) = match base {
None => (10, value.slice()), None => (10, value.slice()),