Some basic parsing works (fixing order of operations), and one test case.

This commit is contained in:
2020-08-13 10:15:32 -07:00
parent 2881c5104a
commit 91d5d1b4fd
7 changed files with 137 additions and 50 deletions

View File

@@ -1,21 +1,25 @@
use lalrpop_util::lalrpop_mod; use lalrpop_util::lalrpop_mod;
mod tokens;
mod token_stream; mod token_stream;
mod tokens;
lalrpop_mod!(parser, "/syntax/parser.rs"); lalrpop_mod!(parser, "/syntax/parser.rs");
mod ast; mod ast;
pub use crate::syntax::ast::*; pub use crate::syntax::ast::*;
use crate::syntax::parser::ProgramParser; use crate::syntax::parser::ProgramParser;
use crate::syntax::tokens::Token;
use crate::syntax::token_stream::{LexerError, Location, TokenStream}; use crate::syntax::token_stream::{LexerError, Location, TokenStream};
use crate::syntax::tokens::Token;
#[cfg(test)]
use crate::util::istring::InternedString;
use lalrpop_util::ParseError; use lalrpop_util::ParseError;
use std::fs; use std::fs;
use std::io; use std::io;
use std::str::FromStr;
#[derive(Debug)]
pub enum ParserError { pub enum ParserError {
IOError(io::Error), IOError(io::Error),
ParseError(ParseError<Location,Token,LexerError>), ParseError(ParseError<Location, Token, LexerError>),
} }
impl From<io::Error> for ParserError { impl From<io::Error> for ParserError {
@@ -24,7 +28,7 @@ impl From<io::Error> for ParserError {
} }
} }
impl From<ParseError<Location,Token,LexerError>> for ParserError { impl From<ParseError<Location, Token, LexerError>> for ParserError {
fn from(x: ParseError<Location, Token, LexerError>) -> Self { fn from(x: ParseError<Location, Token, LexerError>) -> Self {
ParserError::ParseError(x) ParserError::ParseError(x)
} }
@@ -37,4 +41,51 @@ impl Program {
let lexer = TokenStream::from_file(filename, &mut buffer)?; let lexer = TokenStream::from_file(filename, &mut buffer)?;
Ok(ProgramParser::new().parse(lexer)?) Ok(ProgramParser::new().parse(lexer)?)
} }
}
fn parse(filename: &str, buffer: &mut String) -> Result<Program, ParserError> {
let lexer = TokenStream::new(filename, buffer);
Ok(ProgramParser::new().parse(lexer)?)
}
}
impl FromStr for Program {
type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> {
let mut s2 = s.to_string();
Program::parse("<from_str>", &mut s2)
}
}
#[test]
fn order_of_operations() {
let muladd1 = "1 + 2 * 3";
let testfile = InternedString::new("<from_str>");
assert_eq!(
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![],
result: Expression::Primitive(
Location::InFile(testfile, 2),
"+".to_string(),
vec![
Expression::Value(Location::InFile(testfile, 0), Value::Number(None, 1)),
Expression::Primitive(
Location::InFile(testfile, 6),
"*".to_string(),
vec![
Expression::Value(
Location::InFile(testfile, 4),
Value::Number(None, 2),
),
Expression::Value(
Location::InFile(testfile, 8),
Value::Number(None, 3),
),
]
)
]
)
}
);
}

View File

@@ -1,21 +1,25 @@
use crate::syntax::token_stream::Location; use crate::syntax::token_stream::Location;
#[derive(Debug, PartialEq)]
pub struct Program { pub struct Program {
pub statements: Vec<Statement>, pub statements: Vec<Statement>,
pub result: Expression, pub result: Expression,
} }
#[derive(Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Binding(Location, String, Expression), Binding(Location, String, Expression),
Expr(Location, Expression), Expr(Location, Expression),
} }
#[derive(Debug, PartialEq)]
pub enum Expression { pub enum Expression {
Value(Location, Value), Value(Location, Value),
Reference(Location, String), Reference(Location, String),
Primitive(Location, String, Vec<Expression>), Primitive(Location, String, Vec<Expression>),
} }
#[derive(Debug, PartialEq)]
pub enum Value { pub enum Value {
Number(Option<u8>, i128) Number(Option<u8>, i128),
} }

View File

@@ -46,18 +46,18 @@ Statement: Statement = {
} }
Expression: Expression = { Expression: Expression = {
MultiplicativeExpression
}
MultiplicativeExpression: Expression = {
<l:@L> <e1:MultiplicativeExpression> "*" <e2:AdditiveExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<l:@L> <e1:MultiplicativeExpression> "/" <e2:AdditiveExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AdditiveExpression, AdditiveExpression,
} }
AdditiveExpression: Expression = { AdditiveExpression: Expression = {
<l:@L> <e1:AdditiveExpression> "+" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]), <e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(l, "+".to_string(), vec![e1, e2]),
<l:@L> <e1:AdditiveExpression> "-" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]), <e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(l, "-".to_string(), vec![e1, e2]),
MultiplicativeExpression,
}
MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AtomicExpression, AtomicExpression,
} }

View File

@@ -1,6 +1,6 @@
use crate::syntax::tokens::Token; use crate::syntax::tokens::Token;
use crate::util::istring::InternedString; use crate::util::istring::InternedString;
use logos::{Logos,SpannedIter}; use logos::{Logos, SpannedIter};
use std::fs::File; use std::fs::File;
use std::io; use std::io;
use std::io::Read; use std::io::Read;
@@ -14,7 +14,7 @@ impl<'s> TokenStream<'s> {
pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> { pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> {
TokenStream { TokenStream {
filename: InternedString::new(filename), filename: InternedString::new(filename),
lexer: Token::lexer(s).spanned() lexer: Token::lexer(s).spanned(),
} }
} }
@@ -25,10 +25,10 @@ impl<'s> TokenStream<'s> {
} }
} }
#[derive(Clone,Debug,PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Location { pub enum Location {
InFile(InternedString, usize), InFile(InternedString, usize),
Manufactured Manufactured,
} }
impl Location { impl Location {
@@ -43,20 +43,20 @@ impl Default for Location {
} }
} }
#[derive(Debug,PartialEq)] #[derive(Debug, PartialEq)]
pub struct LexerError { pub struct LexerError {
filename: InternedString, filename: InternedString,
offset: usize offset: usize,
} }
#[cfg(test)] #[cfg(test)]
impl LexerError { impl LexerError {
fn new(filename: InternedString, offset: usize) -> LexerError { fn new(filename: InternedString, offset: usize) -> LexerError {
LexerError{ filename, offset, } LexerError { filename, offset }
} }
} }
type LocatedToken = Result<(Location, Token, Location),LexerError>; type LocatedToken = Result<(Location, Token, Location), LexerError>;
impl<'s> Iterator for TokenStream<'s> { impl<'s> Iterator for TokenStream<'s> {
type Item = LocatedToken; type Item = LocatedToken;
@@ -64,12 +64,10 @@ impl<'s> Iterator for TokenStream<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() { match self.lexer.next() {
None => None, None => None,
Some((Token::Error, span)) => { Some((Token::Error, span)) => Some(Err(LexerError {
Some(Err(LexerError { filename: self.filename,
filename: self.filename, offset: span.start,
offset: span.start, })),
}))
}
Some((token, span)) => { Some((token, span)) => {
let start = Location::new(self.filename, span.start); let start = Location::new(self.filename, span.start);
let end = Location::new(self.filename, span.end); let end = Location::new(self.filename, span.end);
@@ -83,11 +81,46 @@ impl<'s> Iterator for TokenStream<'s> {
fn stream_works() { fn stream_works() {
let fname = InternedString::new("<file>"); let fname = InternedString::new("<file>");
let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo"); let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo");
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1))))); assert_eq!(
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3))))); lex0.next(),
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5))))); Some(Ok((
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7))))); Location::new(fname, 0),
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9))))); Token::var("y"),
Location::new(fname, 1)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 2),
Token::Equals,
Location::new(fname, 3)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 4),
Token::var("x"),
Location::new(fname, 5)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 6),
Token::Operator('+'),
Location::new(fname, 7)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 8),
Token::Number((None, 1)),
Location::new(fname, 9)
)))
);
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }
@@ -96,4 +129,4 @@ fn errors_work() {
let fname = InternedString::new("<file>"); let fname = InternedString::new("<file>");
let mut lex0 = TokenStream::new("<file>", "\u{2639}"); let mut lex0 = TokenStream::new("<file>", "\u{2639}");
assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0)))); assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0))));
} }

View File

@@ -2,11 +2,10 @@ use crate::util::istring::InternedString;
use logos::{Lexer, Logos}; use logos::{Lexer, Logos};
use std::num::ParseIntError; use std::num::ParseIntError;
#[derive(Logos,Clone,Debug,PartialEq)] #[derive(Logos, Clone, Debug, PartialEq)]
pub enum Token { pub enum Token {
#[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"//.*", logos::skip)] #[regex(r"//.*", logos::skip)]
#[token("=")] #[token("=")]
Equals, Equals,
@@ -37,10 +36,13 @@ impl Token {
} }
} }
fn parse_number<'a,'src>(base: Option<u8>, value: &'a Lexer<'src, Token>) -> Result<(Option<u8>, i128), ParseIntError> { fn parse_number<'a, 'src>(
base: Option<u8>,
value: &'a Lexer<'src, Token>,
) -> Result<(Option<u8>, i128), ParseIntError> {
let (radix, strval) = match base { let (radix, strval) = match base {
None => (10, value.slice()), None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]), Some(radix) => (radix, &value.slice()[2..]),
}; };
println!("HERE! (radix {}, slice |{}|", radix, strval); println!("HERE! (radix {}, slice |{}|", radix, strval);
@@ -80,4 +82,3 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }

View File

@@ -1 +1 @@
pub mod istring; pub mod istring;

View File

@@ -1,5 +1,5 @@
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::cmp::{Ordering, max}; use std::cmp::{max, Ordering};
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
use std::sync::RwLock; use std::sync::RwLock;
@@ -18,20 +18,18 @@ impl InternedString {
/// should be used somewhat sparingly. /// should be used somewhat sparingly.
pub fn new(s: &str) -> Self { pub fn new(s: &str) -> Self {
let mut biggest_index = 0; let mut biggest_index = 0;
let mut table = STRING_TABLE.write().unwrap(); let mut table = STRING_TABLE.write().unwrap();
for (k, v) in table.iter() { for (k, v) in table.iter() {
if v == s { if v == s {
return InternedString{ index: *k } return InternedString { index: *k };
} }
biggest_index = max(biggest_index, *k); biggest_index = max(biggest_index, *k);
} }
let res = biggest_index + 1; let res = biggest_index + 1;
table.insert(res, s.to_string()); table.insert(res, s.to_string());
InternedString { InternedString { index: res }
index: res
}
} }
} }
@@ -65,4 +63,4 @@ impl PartialOrd<InternedString> for InternedString {
None None
} }
} }