diff --git a/Cargo.toml b/Cargo.toml index bc72b48..6cb39f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,11 @@ name = "ngrc" path = "src/bin.rs" [dependencies] -lalrpop-util = "0.19.0" -lazy_static = "1.4.0" -logos = "0.11.4" +clap = { version = "^3.0.14", features = ["derive"] } +lalrpop-util = "^0.19.7" +lazy_static = "^1.4.0" +logos = "^0.12.0" +thiserror = "^1.0.30" [build-dependencies] -lalrpop = "0.19.0" +lalrpop = "^0.19.7" diff --git a/examples/basic/test1.ngr b/examples/basic/test1.ngr new file mode 100644 index 0000000..b8d66e1 --- /dev/null +++ b/examples/basic/test1.ngr @@ -0,0 +1,4 @@ +x = 5; +y = 4*x + 3; +print x; +print y; diff --git a/src/bin.rs b/src/bin.rs index e7a11a9..d475914 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,3 +1,29 @@ -fn main() { - println!("Hello, world!"); +use clap::Parser; +use ngr::syntax::{ParserError, Program}; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct CommandLineArguments { + /// Optional output file name + #[clap(short, long)] + output: Option, + + /// The file to parse + file: String +} + +fn real_main() -> Result<(), ParserError> { + let args = CommandLineArguments::parse(); + + let program = Program::from_file(&args.file)?; + println!("args: {:?}", args); + println!("program: {:?}", program); + + Ok(()) +} + +fn main() { + if let Err(e) = real_main() { + println!("{}", e); + } } diff --git a/src/syntax.rs b/src/syntax.rs index d85d5a6..0fe4cd0 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -12,16 +12,62 @@ use crate::syntax::tokens::Token; #[cfg(test)] use crate::util::istring::InternedString; use lalrpop_util::ParseError; +use std::fmt; use std::fs; use std::io; use std::str::FromStr; +use thiserror::Error; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum ParserError { IOError(io::Error), ParseError(ParseError), } +impl fmt::Display for ParserError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ParserError::IOError(e) => write!(f, "IO error: {}", e), + + ParserError::ParseError(ParseError::ExtraToken{ token: (loc, tok, _)}) => { + write!(f, "{}: Unexpected additional token ({}) found at end of file", loc, tok) + } + ParserError::ParseError(ParseError::InvalidToken { location }) => { + write!(f, "{}: Unexpected token encountered", location) + } + ParserError::ParseError(ParseError::UnrecognizedEOF { location, expected }) => { + write!(f, "{}: Unexpected EOF{}", location, display_expected(expected)) + } + ParserError::ParseError(ParseError::UnrecognizedToken { token: (location, tok, _), expected }) => { + write!(f, "{}: Unexpected token {}{}", location, tok, display_expected(expected)) + } + ParserError::ParseError(ParseError::User{ error }) => { + write!(f, "{}: Couldn't process input (lexer error)", error.location) + } + } + } +} + +fn display_expected(expected: &Vec) -> String { + match expected.len() { + 0 => "".to_string(), + 1 => format!("; expected {}", expected[0]), + 2 => format!("; expected {} or {}", expected[0], expected[1]), + n => format!("; expected {}or {}", comma_separate(&expected[0..n-1]), expected[n-1]) + } +} + +fn comma_separate(strings: &[String]) -> String { + let mut result = String::new(); + + for s in strings.iter() { + result.push_str(&s); + result.push_str(", "); + } + + result +} + impl From for ParserError { fn from(x: io::Error) -> Self { ParserError::IOError(x) diff --git a/src/syntax/token_stream.rs b/src/syntax/token_stream.rs index a59e8a9..b394477 100644 --- a/src/syntax/token_stream.rs +++ b/src/syntax/token_stream.rs @@ -1,6 +1,7 @@ use crate::syntax::tokens::Token; use crate::util::istring::InternedString; use logos::{Logos, SpannedIter}; +use std::fmt; use std::fs::File; use std::io; use std::io::Read; @@ -31,6 +32,15 @@ pub enum Location { Manufactured, } +impl fmt::Display for Location { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Location::InFile(s, off) => write!(f, "{}:{}", s, off), + Location::Manufactured => write!(f, ""), + } + } +} + impl Location { fn new(filename: InternedString, offset: usize) -> Location { Location::InFile(filename, offset) @@ -45,14 +55,12 @@ impl Default for Location { #[derive(Debug, PartialEq)] pub struct LexerError { - filename: InternedString, - offset: usize, + pub location: Location, } -#[cfg(test)] impl LexerError { fn new(filename: InternedString, offset: usize) -> LexerError { - LexerError { filename, offset } + LexerError { location: Location::new(filename, offset) } } } @@ -64,10 +72,7 @@ impl<'s> Iterator for TokenStream<'s> { fn next(&mut self) -> Option { match self.lexer.next() { None => None, - Some((Token::Error, span)) => Some(Err(LexerError { - filename: self.filename, - offset: span.start, - })), + Some((Token::Error, span)) => Some(Err(LexerError::new(self.filename, span.start))), Some((token, span)) => { let start = Location::new(self.filename, span.start); let end = Location::new(self.filename, span.end); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index eec33ef..0f4790b 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -1,18 +1,17 @@ use crate::util::istring::InternedString; use logos::{Lexer, Logos}; +use std::fmt; use std::num::ParseIntError; #[derive(Logos, Clone, Debug, PartialEq)] pub enum Token { - #[regex(r"[ \t\n\f]+", logos::skip)] - #[regex(r"//.*", logos::skip)] #[token("=")] Equals, #[token(";")] Semi, - #[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))] + #[regex(r"[+\-*/]", |v| v.slice().chars().next())] Operator(char), #[regex(r"0b[01]+", |v| parse_number(Some(2), v))] @@ -26,9 +25,29 @@ pub enum Token { Variable(InternedString), #[error] + #[regex(r"[ \t\r\n\f]+", logos::skip)] + #[regex(r"//.*", logos::skip)] Error, } +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Token::Equals => write!(f, "'='"), + Token::Semi => write!(f, "';'"), + Token::Operator(c) => write!(f, "'{}'", c), + Token::Number((None, v)) => write!(f, "'{}'", v), + Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v), + Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), + Token::Number((Some(10), v)) => write!(f, "'{}'", v), + Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), + Token::Number((Some(b), v)) => write!(f, "Invalidly-based-number", b, v), + Token::Variable(s) => write!(f, "'{}'", s), + Token::Error => write!(f, ""), + } + } +} + #[cfg(test)] impl Token { pub(crate) fn var(s: &str) -> Token { @@ -36,9 +55,9 @@ impl Token { } } -fn parse_number<'a, 'src>( +fn parse_number( base: Option, - value: &'a Lexer<'src, Token>, + value: &Lexer, ) -> Result<(Option, i128), ParseIntError> { let (radix, strval) = match base { None => (10, value.slice()),