From 91d5d1b4fddc67e3807938db5844335116afba28 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Thu, 13 Aug 2020 10:15:32 -0700 Subject: [PATCH] Some basic parsing works (fixing order of operations), and one test case. --- src/syntax.rs | 61 ++++++++++++++++++++++++++++--- src/syntax/ast.rs | 10 ++++-- src/syntax/parser.lalrpop | 16 ++++----- src/syntax/token_stream.rs | 73 +++++++++++++++++++++++++++----------- src/syntax/tokens.rs | 13 +++---- src/util.rs | 2 +- src/util/istring.rs | 12 +++---- 7 files changed, 137 insertions(+), 50 deletions(-) diff --git a/src/syntax.rs b/src/syntax.rs index 9264712..d85d5a6 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,21 +1,25 @@ use lalrpop_util::lalrpop_mod; -mod tokens; mod token_stream; +mod tokens; lalrpop_mod!(parser, "/syntax/parser.rs"); mod ast; pub use crate::syntax::ast::*; use crate::syntax::parser::ProgramParser; -use crate::syntax::tokens::Token; use crate::syntax::token_stream::{LexerError, Location, TokenStream}; +use crate::syntax::tokens::Token; +#[cfg(test)] +use crate::util::istring::InternedString; use lalrpop_util::ParseError; use std::fs; use std::io; +use std::str::FromStr; +#[derive(Debug)] pub enum ParserError { IOError(io::Error), - ParseError(ParseError), + ParseError(ParseError), } impl From for ParserError { @@ -24,7 +28,7 @@ impl From for ParserError { } } -impl From> for ParserError { +impl From> for ParserError { fn from(x: ParseError) -> Self { ParserError::ParseError(x) } @@ -37,4 +41,51 @@ impl Program { let lexer = TokenStream::from_file(filename, &mut buffer)?; Ok(ProgramParser::new().parse(lexer)?) } -} \ No newline at end of file + + fn parse(filename: &str, buffer: &mut String) -> Result { + let lexer = TokenStream::new(filename, buffer); + Ok(ProgramParser::new().parse(lexer)?) + } +} + +impl FromStr for Program { + type Err = ParserError; + + fn from_str(s: &str) -> Result { + let mut s2 = s.to_string(); + Program::parse("", &mut s2) + } +} + +#[test] +fn order_of_operations() { + let muladd1 = "1 + 2 * 3"; + let testfile = InternedString::new(""); + assert_eq!( + Program::from_str(muladd1).unwrap(), + Program { + statements: vec![], + result: Expression::Primitive( + Location::InFile(testfile, 2), + "+".to_string(), + vec![ + Expression::Value(Location::InFile(testfile, 0), Value::Number(None, 1)), + Expression::Primitive( + Location::InFile(testfile, 6), + "*".to_string(), + vec![ + Expression::Value( + Location::InFile(testfile, 4), + Value::Number(None, 2), + ), + Expression::Value( + Location::InFile(testfile, 8), + Value::Number(None, 3), + ), + ] + ) + ] + ) + } + ); +} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index c9c7e7a..09f5bd5 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,21 +1,25 @@ use crate::syntax::token_stream::Location; +#[derive(Debug, PartialEq)] pub struct Program { - pub statements: Vec, - pub result: Expression, + pub statements: Vec, + pub result: Expression, } +#[derive(Debug, PartialEq)] pub enum Statement { Binding(Location, String, Expression), Expr(Location, Expression), } +#[derive(Debug, PartialEq)] pub enum Expression { Value(Location, Value), Reference(Location, String), Primitive(Location, String, Vec), } +#[derive(Debug, PartialEq)] pub enum Value { - Number(Option, i128) + Number(Option, i128), } diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 9e67714..0c03494 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -46,18 +46,18 @@ Statement: Statement = { } Expression: Expression = { - MultiplicativeExpression -} - -MultiplicativeExpression: Expression = { - "*" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), - "/" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), AdditiveExpression, } AdditiveExpression: Expression = { - "+" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), - "-" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), + "+" => Expression::Primitive(l, "+".to_string(), vec![e1, e2]), + "-" => Expression::Primitive(l, "-".to_string(), vec![e1, e2]), + MultiplicativeExpression, +} + +MultiplicativeExpression: Expression = { + "*" => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), + "/" => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), AtomicExpression, } diff --git a/src/syntax/token_stream.rs b/src/syntax/token_stream.rs index c98df2f..a59e8a9 100644 --- a/src/syntax/token_stream.rs +++ b/src/syntax/token_stream.rs @@ -1,6 +1,6 @@ use crate::syntax::tokens::Token; use crate::util::istring::InternedString; -use logos::{Logos,SpannedIter}; +use logos::{Logos, SpannedIter}; use std::fs::File; use std::io; use std::io::Read; @@ -14,7 +14,7 @@ impl<'s> TokenStream<'s> { pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { TokenStream { filename: InternedString::new(filename), - lexer: Token::lexer(s).spanned() + lexer: Token::lexer(s).spanned(), } } @@ -25,10 +25,10 @@ impl<'s> TokenStream<'s> { } } -#[derive(Clone,Debug,PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum Location { InFile(InternedString, usize), - Manufactured + Manufactured, } impl Location { @@ -43,20 +43,20 @@ impl Default for Location { } } -#[derive(Debug,PartialEq)] +#[derive(Debug, PartialEq)] pub struct LexerError { filename: InternedString, - offset: usize + offset: usize, } #[cfg(test)] impl LexerError { fn new(filename: InternedString, offset: usize) -> LexerError { - LexerError{ filename, offset, } + LexerError { filename, offset } } } -type LocatedToken = Result<(Location, Token, Location),LexerError>; +type LocatedToken = Result<(Location, Token, Location), LexerError>; impl<'s> Iterator for TokenStream<'s> { type Item = LocatedToken; @@ -64,12 +64,10 @@ impl<'s> Iterator for TokenStream<'s> { fn next(&mut self) -> Option { match self.lexer.next() { None => None, - Some((Token::Error, span)) => { - Some(Err(LexerError { - filename: self.filename, - offset: span.start, - })) - } + Some((Token::Error, span)) => Some(Err(LexerError { + filename: self.filename, + offset: span.start, + })), Some((token, span)) => { let start = Location::new(self.filename, span.start); let end = Location::new(self.filename, span.end); @@ -83,11 +81,46 @@ impl<'s> Iterator for TokenStream<'s> { fn stream_works() { let fname = InternedString::new(""); let mut lex0 = TokenStream::new("", "y = x + 1//foo"); - assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1))))); - assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3))))); - assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5))))); - assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7))))); - assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9))))); + assert_eq!( + lex0.next(), + Some(Ok(( + Location::new(fname, 0), + Token::var("y"), + Location::new(fname, 1) + ))) + ); + assert_eq!( + lex0.next(), + Some(Ok(( + Location::new(fname, 2), + Token::Equals, + Location::new(fname, 3) + ))) + ); + assert_eq!( + lex0.next(), + Some(Ok(( + Location::new(fname, 4), + Token::var("x"), + Location::new(fname, 5) + ))) + ); + assert_eq!( + lex0.next(), + Some(Ok(( + Location::new(fname, 6), + Token::Operator('+'), + Location::new(fname, 7) + ))) + ); + assert_eq!( + lex0.next(), + Some(Ok(( + Location::new(fname, 8), + Token::Number((None, 1)), + Location::new(fname, 9) + ))) + ); assert_eq!(lex0.next(), None); } @@ -96,4 +129,4 @@ fn errors_work() { let fname = InternedString::new(""); let mut lex0 = TokenStream::new("", "\u{2639}"); assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0)))); -} \ No newline at end of file +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 5466b59..eec33ef 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -2,11 +2,10 @@ use crate::util::istring::InternedString; use logos::{Lexer, Logos}; use std::num::ParseIntError; -#[derive(Logos,Clone,Debug,PartialEq)] +#[derive(Logos, Clone, Debug, PartialEq)] pub enum Token { #[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"//.*", logos::skip)] - #[token("=")] Equals, @@ -37,10 +36,13 @@ impl Token { } } -fn parse_number<'a,'src>(base: Option, value: &'a Lexer<'src, Token>) -> Result<(Option, i128), ParseIntError> { +fn parse_number<'a, 'src>( + base: Option, + value: &'a Lexer<'src, Token>, +) -> Result<(Option, i128), ParseIntError> { let (radix, strval) = match base { - None => (10, value.slice()), - Some(radix) => (radix, &value.slice()[2..]), + None => (10, value.slice()), + Some(radix) => (radix, &value.slice()[2..]), }; println!("HERE! (radix {}, slice |{}|", radix, strval); @@ -80,4 +82,3 @@ fn lexer_spans() { assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), None); } - diff --git a/src/util.rs b/src/util.rs index 3cf37a4..f324427 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1 +1 @@ -pub mod istring; \ No newline at end of file +pub mod istring; diff --git a/src/util/istring.rs b/src/util/istring.rs index afd1194..e730f87 100644 --- a/src/util/istring.rs +++ b/src/util/istring.rs @@ -1,5 +1,5 @@ use lazy_static::lazy_static; -use std::cmp::{Ordering, max}; +use std::cmp::{max, Ordering}; use std::collections::HashMap; use std::fmt; use std::sync::RwLock; @@ -18,20 +18,18 @@ impl InternedString { /// should be used somewhat sparingly. pub fn new(s: &str) -> Self { let mut biggest_index = 0; - let mut table = STRING_TABLE.write().unwrap(); + let mut table = STRING_TABLE.write().unwrap(); for (k, v) in table.iter() { if v == s { - return InternedString{ index: *k } + return InternedString { index: *k }; } biggest_index = max(biggest_index, *k); } let res = biggest_index + 1; table.insert(res, s.to_string()); - InternedString { - index: res - } + InternedString { index: res } } } @@ -65,4 +63,4 @@ impl PartialOrd for InternedString { None } -} \ No newline at end of file +}