Some basic parsing works (fixing order of operations), and one test case.

This commit is contained in:
2020-08-13 10:15:32 -07:00
parent 2881c5104a
commit 91d5d1b4fd
7 changed files with 137 additions and 50 deletions

View File

@@ -1,21 +1,25 @@
use lalrpop_util::lalrpop_mod;
mod tokens;
mod token_stream;
mod tokens;
lalrpop_mod!(parser, "/syntax/parser.rs");
mod ast;
pub use crate::syntax::ast::*;
use crate::syntax::parser::ProgramParser;
use crate::syntax::tokens::Token;
use crate::syntax::token_stream::{LexerError, Location, TokenStream};
use crate::syntax::tokens::Token;
#[cfg(test)]
use crate::util::istring::InternedString;
use lalrpop_util::ParseError;
use std::fs;
use std::io;
use std::str::FromStr;
#[derive(Debug)]
pub enum ParserError {
IOError(io::Error),
ParseError(ParseError<Location,Token,LexerError>),
ParseError(ParseError<Location, Token, LexerError>),
}
impl From<io::Error> for ParserError {
@@ -24,7 +28,7 @@ impl From<io::Error> for ParserError {
}
}
impl From<ParseError<Location,Token,LexerError>> for ParserError {
impl From<ParseError<Location, Token, LexerError>> for ParserError {
fn from(x: ParseError<Location, Token, LexerError>) -> Self {
ParserError::ParseError(x)
}
@@ -37,4 +41,51 @@ impl Program {
let lexer = TokenStream::from_file(filename, &mut buffer)?;
Ok(ProgramParser::new().parse(lexer)?)
}
fn parse(filename: &str, buffer: &mut String) -> Result<Program, ParserError> {
let lexer = TokenStream::new(filename, buffer);
Ok(ProgramParser::new().parse(lexer)?)
}
}
impl FromStr for Program {
type Err = ParserError;
fn from_str(s: &str) -> Result<Program, ParserError> {
let mut s2 = s.to_string();
Program::parse("<from_str>", &mut s2)
}
}
#[test]
fn order_of_operations() {
let muladd1 = "1 + 2 * 3";
let testfile = InternedString::new("<from_str>");
assert_eq!(
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![],
result: Expression::Primitive(
Location::InFile(testfile, 2),
"+".to_string(),
vec![
Expression::Value(Location::InFile(testfile, 0), Value::Number(None, 1)),
Expression::Primitive(
Location::InFile(testfile, 6),
"*".to_string(),
vec![
Expression::Value(
Location::InFile(testfile, 4),
Value::Number(None, 2),
),
Expression::Value(
Location::InFile(testfile, 8),
Value::Number(None, 3),
),
]
)
]
)
}
);
}

View File

@@ -1,21 +1,25 @@
use crate::syntax::token_stream::Location;
#[derive(Debug, PartialEq)]
pub struct Program {
pub statements: Vec<Statement>,
pub result: Expression,
}
#[derive(Debug, PartialEq)]
pub enum Statement {
Binding(Location, String, Expression),
Expr(Location, Expression),
}
#[derive(Debug, PartialEq)]
pub enum Expression {
Value(Location, Value),
Reference(Location, String),
Primitive(Location, String, Vec<Expression>),
}
#[derive(Debug, PartialEq)]
pub enum Value {
Number(Option<u8>, i128)
Number(Option<u8>, i128),
}

View File

@@ -46,18 +46,18 @@ Statement: Statement = {
}
Expression: Expression = {
MultiplicativeExpression
}
MultiplicativeExpression: Expression = {
<l:@L> <e1:MultiplicativeExpression> "*" <e2:AdditiveExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<l:@L> <e1:MultiplicativeExpression> "/" <e2:AdditiveExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AdditiveExpression,
}
AdditiveExpression: Expression = {
<l:@L> <e1:AdditiveExpression> "+" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<l:@L> <e1:AdditiveExpression> "-" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(l, "+".to_string(), vec![e1, e2]),
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(l, "-".to_string(), vec![e1, e2]),
MultiplicativeExpression,
}
MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
AtomicExpression,
}

View File

@@ -1,6 +1,6 @@
use crate::syntax::tokens::Token;
use crate::util::istring::InternedString;
use logos::{Logos,SpannedIter};
use logos::{Logos, SpannedIter};
use std::fs::File;
use std::io;
use std::io::Read;
@@ -14,7 +14,7 @@ impl<'s> TokenStream<'s> {
pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> {
TokenStream {
filename: InternedString::new(filename),
lexer: Token::lexer(s).spanned()
lexer: Token::lexer(s).spanned(),
}
}
@@ -25,10 +25,10 @@ impl<'s> TokenStream<'s> {
}
}
#[derive(Clone,Debug,PartialEq)]
#[derive(Clone, Debug, PartialEq)]
pub enum Location {
InFile(InternedString, usize),
Manufactured
Manufactured,
}
impl Location {
@@ -43,20 +43,20 @@ impl Default for Location {
}
}
#[derive(Debug,PartialEq)]
#[derive(Debug, PartialEq)]
pub struct LexerError {
filename: InternedString,
offset: usize
offset: usize,
}
#[cfg(test)]
impl LexerError {
fn new(filename: InternedString, offset: usize) -> LexerError {
LexerError{ filename, offset, }
LexerError { filename, offset }
}
}
type LocatedToken = Result<(Location, Token, Location),LexerError>;
type LocatedToken = Result<(Location, Token, Location), LexerError>;
impl<'s> Iterator for TokenStream<'s> {
type Item = LocatedToken;
@@ -64,12 +64,10 @@ impl<'s> Iterator for TokenStream<'s> {
fn next(&mut self) -> Option<Self::Item> {
match self.lexer.next() {
None => None,
Some((Token::Error, span)) => {
Some(Err(LexerError {
Some((Token::Error, span)) => Some(Err(LexerError {
filename: self.filename,
offset: span.start,
}))
}
})),
Some((token, span)) => {
let start = Location::new(self.filename, span.start);
let end = Location::new(self.filename, span.end);
@@ -83,11 +81,46 @@ impl<'s> Iterator for TokenStream<'s> {
fn stream_works() {
let fname = InternedString::new("<file>");
let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo");
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1)))));
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3)))));
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5)))));
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7)))));
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9)))));
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 0),
Token::var("y"),
Location::new(fname, 1)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 2),
Token::Equals,
Location::new(fname, 3)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 4),
Token::var("x"),
Location::new(fname, 5)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 6),
Token::Operator('+'),
Location::new(fname, 7)
)))
);
assert_eq!(
lex0.next(),
Some(Ok((
Location::new(fname, 8),
Token::Number((None, 1)),
Location::new(fname, 9)
)))
);
assert_eq!(lex0.next(), None);
}

View File

@@ -2,11 +2,10 @@ use crate::util::istring::InternedString;
use logos::{Lexer, Logos};
use std::num::ParseIntError;
#[derive(Logos,Clone,Debug,PartialEq)]
#[derive(Logos, Clone, Debug, PartialEq)]
pub enum Token {
#[regex(r"[ \t\n\f]+", logos::skip)]
#[regex(r"//.*", logos::skip)]
#[token("=")]
Equals,
@@ -37,7 +36,10 @@ impl Token {
}
}
fn parse_number<'a,'src>(base: Option<u8>, value: &'a Lexer<'src, Token>) -> Result<(Option<u8>, i128), ParseIntError> {
fn parse_number<'a, 'src>(
base: Option<u8>,
value: &'a Lexer<'src, Token>,
) -> Result<(Option<u8>, i128), ParseIntError> {
let (radix, strval) = match base {
None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]),
@@ -80,4 +82,3 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
assert_eq!(lex0.next(), None);
}

View File

@@ -1,5 +1,5 @@
use lazy_static::lazy_static;
use std::cmp::{Ordering, max};
use std::cmp::{max, Ordering};
use std::collections::HashMap;
use std::fmt;
use std::sync::RwLock;
@@ -22,16 +22,14 @@ impl InternedString {
for (k, v) in table.iter() {
if v == s {
return InternedString{ index: *k }
return InternedString { index: *k };
}
biggest_index = max(biggest_index, *k);
}
let res = biggest_index + 1;
table.insert(res, s.to_string());
InternedString {
index: res
}
InternedString { index: res }
}
}