Some basic parsing works (fixing order of operations), and one test case.

This commit is contained in:
2020-08-13 10:15:32 -07:00
parent 2881c5104a
commit 91d5d1b4fd
7 changed files with 137 additions and 50 deletions

View File

@@ -1,18 +1,22 @@
use lalrpop_util::lalrpop_mod; use lalrpop_util::lalrpop_mod;
mod tokens;
mod token_stream; mod token_stream;
mod tokens;
lalrpop_mod!(parser, "/syntax/parser.rs"); lalrpop_mod!(parser, "/syntax/parser.rs");
mod ast; mod ast;
pub use crate::syntax::ast::*; pub use crate::syntax::ast::*;
use crate::syntax::parser::ProgramParser; use crate::syntax::parser::ProgramParser;
use crate::syntax::tokens::Token;
use crate::syntax::token_stream::{LexerError, Location, TokenStream}; use crate::syntax::token_stream::{LexerError, Location, TokenStream};
use crate::syntax::tokens::Token;
#[cfg(test)]
use crate::util::istring::InternedString;
use lalrpop_util::ParseError; use lalrpop_util::ParseError;
use std::fs; use std::fs;
use std::io; use std::io;
use std::str::FromStr;
#[derive(Debug)]
pub enum ParserError { pub enum ParserError {
IOError(io::Error), IOError(io::Error),
ParseError(ParseError<Location, Token, LexerError>), ParseError(ParseError<Location, Token, LexerError>),
@@ -37,4 +41,51 @@ impl Program {
let lexer = TokenStream::from_file(filename, &mut buffer)?; let lexer = TokenStream::from_file(filename, &mut buffer)?;
Ok(ProgramParser::new().parse(lexer)?) Ok(ProgramParser::new().parse(lexer)?)
} }
fn parse(filename: &str, buffer: &mut String) -> Result<Program, ParserError> {
let lexer = TokenStream::new(filename, buffer);
Ok(ProgramParser::new().parse(lexer)?)
}
}
impl FromStr for Program {
type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> {
let mut s2 = s.to_string();
Program::parse("<from_str>", &mut s2)
}
}
#[test]
fn order_of_operations() {
let muladd1 = "1 + 2 * 3";
let testfile = InternedString::new("<from_str>");
assert_eq!(
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![],
result: Expression::Primitive(
Location::InFile(testfile, 2),
"+".to_string(),
vec![
Expression::Value(Location::InFile(testfile, 0), Value::Number(None, 1)),
Expression::Primitive(
Location::InFile(testfile, 6),
"*".to_string(),
vec![
Expression::Value(
Location::InFile(testfile, 4),
Value::Number(None, 2),
),
Expression::Value(
Location::InFile(testfile, 8),
Value::Number(None, 3),
),
]
)
]
)
}
);
} }

View File

@@ -1,21 +1,25 @@
use crate::syntax::token_stream::Location; use crate::syntax::token_stream::Location;
#[derive(Debug, PartialEq)]
pub struct Program { pub struct Program {
pub statements: Vec<Statement>, pub statements: Vec<Statement>,
pub result: Expression, pub result: Expression,
} }
#[derive(Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Binding(Location, String, Expression), Binding(Location, String, Expression),
Expr(Location, Expression), Expr(Location, Expression),
} }
#[derive(Debug, PartialEq)]
pub enum Expression { pub enum Expression {
Value(Location, Value), Value(Location, Value),
Reference(Location, String), Reference(Location, String),
Primitive(Location, String, Vec<Expression>), Primitive(Location, String, Vec<Expression>),
} }
#[derive(Debug, PartialEq)]
pub enum Value { pub enum Value {
Number(Option<u8>, i128) Number(Option<u8>, i128),
} }

View File

@@ -46,18 +46,18 @@ Statement: Statement = {
} }
Expression: Expression = { Expression: Expression = {
MultiplicativeExpression
}
MultiplicativeExpression: Expression = {
<l:@L> <e1:MultiplicativeExpression> "*" <e2:AdditiveExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<l:@L> <e1:MultiplicativeExpression> "/" <e2:AdditiveExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AdditiveExpression, AdditiveExpression,
} }
AdditiveExpression: Expression = { AdditiveExpression: Expression = {
<l:@L> <e1:AdditiveExpression> "+" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), <e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(l, "+".to_string(), vec![e1, e2]),
<l:@L> <e1:AdditiveExpression> "-" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), <e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(l, "-".to_string(), vec![e1, e2]),
MultiplicativeExpression,
}
MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AtomicExpression, AtomicExpression,
} }

View File

@@ -14,7 +14,7 @@ impl<'s> TokenStream<'s> {
pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> {
TokenStream { TokenStream {
filename: InternedString::new(filename), filename: InternedString::new(filename),
lexer: Token::lexer(s).spanned() lexer: Token::lexer(s).spanned(),
} }
} }
@@ -28,7 +28,7 @@ impl<'s> TokenStream<'s> {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Location { pub enum Location {
InFile(InternedString, usize), InFile(InternedString, usize),
Manufactured Manufactured,
} }
impl Location { impl Location {
@@ -46,13 +46,13 @@ impl Default for Location {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub struct LexerError { pub struct LexerError {
filename: InternedString, filename: InternedString,
offset: usize offset: usize,
} }
#[cfg(test)] #[cfg(test)]
impl LexerError { impl LexerError {
fn new(filename: InternedString, offset: usize) -> LexerError { fn new(filename: InternedString, offset: usize) -> LexerError {
LexerError{ filename, offset, } LexerError { filename, offset }
} }
} }
@@ -64,12 +64,10 @@ impl<'s> Iterator for TokenStream<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() { match self.lexer.next() {
None => None, None => None,
Some((Token::Error, span)) => { Some((Token::Error, span)) => Some(Err(LexerError {
Some(Err(LexerError {
filename: self.filename, filename: self.filename,
offset: span.start, offset: span.start,
})) })),
}
Some((token, span)) => { Some((token, span)) => {
let start = Location::new(self.filename, span.start); let start = Location::new(self.filename, span.start);
let end = Location::new(self.filename, span.end); let end = Location::new(self.filename, span.end);
@@ -83,11 +81,46 @@ impl<'s> Iterator for TokenStream<'s> {
fn stream_works() { fn stream_works() {
let fname = InternedString::new("<file>"); let fname = InternedString::new("<file>");
let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo"); let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo");
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1))))); assert_eq!(
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3))))); lex0.next(),
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5))))); Some(Ok((
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7))))); Location::new(fname, 0),
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9))))); Token::var("y"),
Location::new(fname, 1)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 2),
Token::Equals,
Location::new(fname, 3)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 4),
Token::var("x"),
Location::new(fname, 5)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 6),
Token::Operator('+'),
Location::new(fname, 7)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 8),
Token::Number((None, 1)),
Location::new(fname, 9)
)))
);
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }

View File

@@ -6,7 +6,6 @@ use std::num::ParseIntError;
pub enum Token { pub enum Token {
#[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"//.*", logos::skip)] #[regex(r"//.*", logos::skip)]
#[token("=")] #[token("=")]
Equals, Equals,
@@ -37,7 +36,10 @@ impl Token {
} }
} }
fn parse_number<'a,'src>(base: Option<u8>, value: &'a Lexer<'src, Token>) -> Result<(Option<u8>, i128), ParseIntError> { fn parse_number<'a, 'src>(
base: Option<u8>,
value: &'a Lexer<'src, Token>,
) -> Result<(Option<u8>, i128), ParseIntError> {
let (radix, strval) = match base { let (radix, strval) = match base {
None => (10, value.slice()), None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]), Some(radix) => (radix, &value.slice()[2..]),
@@ -80,4 +82,3 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }

View File

@@ -1,5 +1,5 @@
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::cmp::{Ordering, max}; use std::cmp::{max, Ordering};
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
use std::sync::RwLock; use std::sync::RwLock;
@@ -22,16 +22,14 @@ impl InternedString {
for (k, v) in table.iter() { for (k, v) in table.iter() {
if v == s { if v == s {
return InternedString{ index: *k } return InternedString { index: *k };
} }
biggest_index = max(biggest_index, *k); biggest_index = max(biggest_index, *k);
} }
let res = biggest_index + 1; let res = biggest_index + 1;
table.insert(res, s.to_string()); table.insert(res, s.to_string());
InternedString { InternedString { index: res }
index: res
}
} }
} }