diff --git a/Cargo.toml b/Cargo.toml index ada12c3..bc72b48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,9 @@ name = "ngrc" path = "src/bin.rs" [dependencies] -logos = "0.11.4" lalrpop-util = "0.19.0" +lazy_static = "1.4.0" +logos = "0.11.4" [build-dependencies] lalrpop = "0.19.0" diff --git a/src/lib.rs b/src/lib.rs index 4a39d2c..ff3ff0a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,2 @@ pub mod syntax; +pub mod util; diff --git a/src/syntax.rs b/src/syntax.rs index f13fc07..9264712 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,5 +1,40 @@ -//use lalrpop_util::lalrpop_mod; +use lalrpop_util::lalrpop_mod; -pub mod tokens; -//lalrpop_mod!(pub parser); -pub mod ast; +mod tokens; +mod token_stream; +lalrpop_mod!(parser, "/syntax/parser.rs"); +mod ast; + +pub use crate::syntax::ast::*; +use crate::syntax::parser::ProgramParser; +use crate::syntax::tokens::Token; +use crate::syntax::token_stream::{LexerError, Location, TokenStream}; +use lalrpop_util::ParseError; +use std::fs; +use std::io; + +pub enum ParserError { + IOError(io::Error), + ParseError(ParseError), +} + +impl From for ParserError { + fn from(x: io::Error) -> Self { + ParserError::IOError(x) + } +} + +impl From> for ParserError { + fn from(x: ParseError) -> Self { + ParserError::ParseError(x) + } +} + +impl Program { + pub fn from_file(filename: &str) -> Result { + let metadata = fs::metadata(filename)?; + let mut buffer = String::with_capacity(metadata.len() as usize); + let lexer = TokenStream::from_file(filename, &mut buffer)?; + Ok(ProgramParser::new().parse(lexer)?) + } +} \ No newline at end of file diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 85192a5..c9c7e7a 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,12 +1,19 @@ -pub enum Stmt { - Binding(String, Expr), - Expr(Expr), +use crate::syntax::token_stream::Location; + +pub struct Program { + pub statements: Vec, + pub result: Expression, } -pub enum Expr { - Value(Value), - Reference(String), - Primitive(String, Vec), +pub enum Statement { + Binding(Location, String, Expression), + Expr(Location, Expression), +} + +pub enum Expression { + Value(Location, Value), + Reference(Location, String), + Primitive(Location, String, Vec), } pub enum Value { diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop new file mode 100644 index 0000000..9e67714 --- /dev/null +++ b/src/syntax/parser.lalrpop @@ -0,0 +1,70 @@ +use crate::syntax::ast::{Program,Statement,Expression,Value}; +use crate::syntax::tokens::Token; +use crate::syntax::token_stream::{LexerError, Location}; +use crate::util::istring::InternedString; + +grammar; + +extern { + type Location = Location; + type Error = LexerError; + + enum Token { + "=" => Token::Equals, + ";" => Token::Semi, + + "+" => Token::Operator('+'), + "-" => Token::Operator('-'), + "*" => Token::Operator('*'), + "/" => Token::Operator('/'), + + "" => Token::Number((>,)), + "" => Token::Variable(), + } +} + +pub Program: Program = { + => Program { + statements: stmts, + result + } +} + +Statements: Vec = { + => { + stmts.push(stmt); + stmts + }, + => { + Vec::new() + } +} + +Statement: Statement = { + "> "=" ";" => Statement::Binding(l, v.to_string(), e), + ";" => Statement::Expr(l, e), +} + +Expression: Expression = { + MultiplicativeExpression +} + +MultiplicativeExpression: Expression = { + "*" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), + "/" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), + AdditiveExpression, +} + +AdditiveExpression: Expression = { + "+" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), + "-" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), + AtomicExpression, +} + +AtomicExpression: Expression = { + "> => Expression::Reference(l, v.to_string()), + "> => { + let val = Value::Number(n.0, n.1); + Expression::Value(l, val) + } +} \ No newline at end of file diff --git a/src/syntax/token_stream.rs b/src/syntax/token_stream.rs new file mode 100644 index 0000000..c98df2f --- /dev/null +++ b/src/syntax/token_stream.rs @@ -0,0 +1,99 @@ +use crate::syntax::tokens::Token; +use crate::util::istring::InternedString; +use logos::{Logos,SpannedIter}; +use std::fs::File; +use std::io; +use std::io::Read; + +pub struct TokenStream<'s> { + filename: InternedString, + lexer: SpannedIter<'s, Token>, +} + +impl<'s> TokenStream<'s> { + pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { + TokenStream { + filename: InternedString::new(filename), + lexer: Token::lexer(s).spanned() + } + } + + pub fn from_file(filename: &str, buffer: &'s mut String) -> io::Result> { + let mut file = File::open(filename)?; + file.read_to_string(buffer)?; + Ok(TokenStream::new(filename, buffer)) + } +} + +#[derive(Clone,Debug,PartialEq)] +pub enum Location { + InFile(InternedString, usize), + Manufactured +} + +impl Location { + fn new(filename: InternedString, offset: usize) -> Location { + Location::InFile(filename, offset) + } +} + +impl Default for Location { + fn default() -> Self { + Location::Manufactured + } +} + +#[derive(Debug,PartialEq)] +pub struct LexerError { + filename: InternedString, + offset: usize +} + +#[cfg(test)] +impl LexerError { + fn new(filename: InternedString, offset: usize) -> LexerError { + LexerError{ filename, offset, } + } +} + +type LocatedToken = Result<(Location, Token, Location),LexerError>; + +impl<'s> Iterator for TokenStream<'s> { + type Item = LocatedToken; + + fn next(&mut self) -> Option { + match self.lexer.next() { + None => None, + Some((Token::Error, span)) => { + Some(Err(LexerError { + filename: self.filename, + offset: span.start, + })) + } + Some((token, span)) => { + let start = Location::new(self.filename, span.start); + let end = Location::new(self.filename, span.end); + Some(Ok((start, token, end))) + } + } + } +} + +#[test] +fn stream_works() { + let fname = InternedString::new(""); + let mut lex0 = TokenStream::new("", "y = x + 1//foo"); + assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1))))); + assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3))))); + assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5))))); + assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7))))); + assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9))))); + assert_eq!(lex0.next(), None); +} + +#[test] +fn errors_work() { + let fname = InternedString::new(""); + let mut lex0 = TokenStream::new("", "\u{2639}"); + assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0)))); +} \ No newline at end of file diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 2e2711d..5466b59 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -1,14 +1,18 @@ +use crate::util::istring::InternedString; use logos::{Lexer, Logos}; use std::num::ParseIntError; -#[derive(Logos,Debug,PartialEq)] -enum Token<'src> { +#[derive(Logos,Clone,Debug,PartialEq)] +pub enum Token { #[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"//.*", logos::skip)] #[token("=")] Equals, + #[token(";")] + Semi, + #[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))] Operator(char), @@ -19,14 +23,21 @@ enum Token<'src> { #[regex(r"[0-9]+", |v| parse_number(None, v))] Number((Option, i128)), - #[regex(r"[a-z][a-zA-Z0-9_]*")] - Variable(&'src str), + #[regex(r"[a-z][a-zA-Z0-9_]*", |v| InternedString::new(v.slice()))] + Variable(InternedString), #[error] Error, } -fn parse_number<'a,'src>(base: Option, value: &'a Lexer<'src, Token<'src>>) -> Result<(Option, i128), ParseIntError> { +#[cfg(test)] +impl Token { + pub(crate) fn var(s: &str) -> Token { + Token::Variable(InternedString::new(s)) + } +} + +fn parse_number<'a,'src>(base: Option, value: &'a Lexer<'src, Token>) -> Result<(Option, i128), ParseIntError> { let (radix, strval) = match base { None => (10, value.slice()), Some(radix) => (radix, &value.slice()[2..]), @@ -51,20 +62,20 @@ fn lex_numbers() { #[test] fn lex_symbols() { let mut lex0 = Token::lexer("x + \t y * \n z // rest"); - assert_eq!(lex0.next(), Some(Token::Variable("x"))); + assert_eq!(lex0.next(), Some(Token::var("x"))); assert_eq!(lex0.next(), Some(Token::Operator('+'))); - assert_eq!(lex0.next(), Some(Token::Variable("y"))); + assert_eq!(lex0.next(), Some(Token::var("y"))); assert_eq!(lex0.next(), Some(Token::Operator('*'))); - assert_eq!(lex0.next(), Some(Token::Variable("z"))); + assert_eq!(lex0.next(), Some(Token::var("z"))); assert_eq!(lex0.next(), None); } #[test] fn lexer_spans() { let mut lex0 = Token::lexer("y = x + 1//foo").spanned(); - assert_eq!(lex0.next(), Some((Token::Variable("y"), 0..1))); + assert_eq!(lex0.next(), Some((Token::var("y"), 0..1))); assert_eq!(lex0.next(), Some((Token::Equals, 2..3))); - assert_eq!(lex0.next(), Some((Token::Variable("x"), 4..5))); + assert_eq!(lex0.next(), Some((Token::var("x"), 4..5))); assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7))); assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), None); diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..3cf37a4 --- /dev/null +++ b/src/util.rs @@ -0,0 +1 @@ +pub mod istring; \ No newline at end of file diff --git a/src/util/istring.rs b/src/util/istring.rs new file mode 100644 index 0000000..afd1194 --- /dev/null +++ b/src/util/istring.rs @@ -0,0 +1,68 @@ +use lazy_static::lazy_static; +use std::cmp::{Ordering, max}; +use std::collections::HashMap; +use std::fmt; +use std::sync::RwLock; + +lazy_static! { + static ref STRING_TABLE: RwLock> = RwLock::new(HashMap::new()); +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct InternedString { + index: u64, +} + +impl InternedString { + /// Return the `InternedString` equivalent of the provided string. This function is slow, and + /// should be used somewhat sparingly. + pub fn new(s: &str) -> Self { + let mut biggest_index = 0; + let mut table = STRING_TABLE.write().unwrap(); + + for (k, v) in table.iter() { + if v == s { + return InternedString{ index: *k } + } + biggest_index = max(biggest_index, *k); + } + + let res = biggest_index + 1; + table.insert(res, s.to_string()); + InternedString { + index: res + } + } +} + +impl fmt::Debug for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match STRING_TABLE.read().unwrap().get(&self.index) { + None => write!(f, ""), + Some(x) => write!(f, "{:?}", x), + } + } +} + +impl fmt::Display for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match STRING_TABLE.read().unwrap().get(&self.index) { + None => write!(f, ""), + Some(x) => write!(f, "{}", x), + } + } +} + +impl PartialOrd for InternedString { + fn partial_cmp(&self, other: &InternedString) -> Option { + let table = STRING_TABLE.read().unwrap(); + + if let Some(me) = table.get(&self.index) { + if let Some(them) = table.get(&other.index) { + return me.partial_cmp(them); + } + } + + None + } +} \ No newline at end of file