Some basic parsing, with interned strings.
This commit is contained in:
@@ -13,8 +13,9 @@ name = "ngrc"
|
|||||||
path = "src/bin.rs"
|
path = "src/bin.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
logos = "0.11.4"
|
|
||||||
lalrpop-util = "0.19.0"
|
lalrpop-util = "0.19.0"
|
||||||
|
lazy_static = "1.4.0"
|
||||||
|
logos = "0.11.4"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
lalrpop = "0.19.0"
|
lalrpop = "0.19.0"
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
pub mod syntax;
|
pub mod syntax;
|
||||||
|
pub mod util;
|
||||||
|
|||||||
@@ -1,5 +1,40 @@
|
|||||||
//use lalrpop_util::lalrpop_mod;
|
use lalrpop_util::lalrpop_mod;
|
||||||
|
|
||||||
pub mod tokens;
|
mod tokens;
|
||||||
//lalrpop_mod!(pub parser);
|
mod token_stream;
|
||||||
pub mod ast;
|
lalrpop_mod!(parser, "/syntax/parser.rs");
|
||||||
|
mod ast;
|
||||||
|
|
||||||
|
pub use crate::syntax::ast::*;
|
||||||
|
use crate::syntax::parser::ProgramParser;
|
||||||
|
use crate::syntax::tokens::Token;
|
||||||
|
use crate::syntax::token_stream::{LexerError, Location, TokenStream};
|
||||||
|
use lalrpop_util::ParseError;
|
||||||
|
use std::fs;
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
pub enum ParserError {
|
||||||
|
IOError(io::Error),
|
||||||
|
ParseError(ParseError<Location,Token,LexerError>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for ParserError {
|
||||||
|
fn from(x: io::Error) -> Self {
|
||||||
|
ParserError::IOError(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ParseError<Location,Token,LexerError>> for ParserError {
|
||||||
|
fn from(x: ParseError<Location, Token, LexerError>) -> Self {
|
||||||
|
ParserError::ParseError(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Program {
|
||||||
|
pub fn from_file(filename: &str) -> Result<Program, ParserError> {
|
||||||
|
let metadata = fs::metadata(filename)?;
|
||||||
|
let mut buffer = String::with_capacity(metadata.len() as usize);
|
||||||
|
let lexer = TokenStream::from_file(filename, &mut buffer)?;
|
||||||
|
Ok(ProgramParser::new().parse(lexer)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,19 @@
|
|||||||
pub enum Stmt {
|
use crate::syntax::token_stream::Location;
|
||||||
Binding(String, Expr),
|
|
||||||
Expr(Expr),
|
pub struct Program {
|
||||||
|
pub statements: Vec<Statement>,
|
||||||
|
pub result: Expression,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum Expr {
|
pub enum Statement {
|
||||||
Value(Value),
|
Binding(Location, String, Expression),
|
||||||
Reference(String),
|
Expr(Location, Expression),
|
||||||
Primitive(String, Vec<Expr>),
|
}
|
||||||
|
|
||||||
|
pub enum Expression {
|
||||||
|
Value(Location, Value),
|
||||||
|
Reference(Location, String),
|
||||||
|
Primitive(Location, String, Vec<Expression>),
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
|
|||||||
70
src/syntax/parser.lalrpop
Normal file
70
src/syntax/parser.lalrpop
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
||||||
|
use crate::syntax::tokens::Token;
|
||||||
|
use crate::syntax::token_stream::{LexerError, Location};
|
||||||
|
use crate::util::istring::InternedString;
|
||||||
|
|
||||||
|
grammar;
|
||||||
|
|
||||||
|
extern {
|
||||||
|
type Location = Location;
|
||||||
|
type Error = LexerError;
|
||||||
|
|
||||||
|
enum Token {
|
||||||
|
"=" => Token::Equals,
|
||||||
|
";" => Token::Semi,
|
||||||
|
|
||||||
|
"+" => Token::Operator('+'),
|
||||||
|
"-" => Token::Operator('-'),
|
||||||
|
"*" => Token::Operator('*'),
|
||||||
|
"/" => Token::Operator('/'),
|
||||||
|
|
||||||
|
"<num>" => Token::Number((<Option<u8>>,<i128>)),
|
||||||
|
"<var>" => Token::Variable(<InternedString>),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub Program: Program = {
|
||||||
|
<stmts:Statements> <result:Expression> => Program {
|
||||||
|
statements: stmts,
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Statements: Vec<Statement> = {
|
||||||
|
<mut stmts:Statements> <stmt:Statement> => {
|
||||||
|
stmts.push(stmt);
|
||||||
|
stmts
|
||||||
|
},
|
||||||
|
=> {
|
||||||
|
Vec::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Statement: Statement = {
|
||||||
|
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(l, v.to_string(), e),
|
||||||
|
<l:@L> <e:Expression> ";" => Statement::Expr(l, e),
|
||||||
|
}
|
||||||
|
|
||||||
|
Expression: Expression = {
|
||||||
|
MultiplicativeExpression
|
||||||
|
}
|
||||||
|
|
||||||
|
MultiplicativeExpression: Expression = {
|
||||||
|
<l:@L> <e1:MultiplicativeExpression> "*" <e2:AdditiveExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
|
||||||
|
<l:@L> <e1:MultiplicativeExpression> "/" <e2:AdditiveExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
|
||||||
|
AdditiveExpression,
|
||||||
|
}
|
||||||
|
|
||||||
|
AdditiveExpression: Expression = {
|
||||||
|
<l:@L> <e1:AdditiveExpression> "+" <e2:AtomicExpression> => Expression::Primitive(l, "*".to_string(), vec![e1, e2]),
|
||||||
|
<l:@L> <e1:AdditiveExpression> "-" <e2:AtomicExpression> => Expression::Primitive(l, "/".to_string(), vec![e1, e2]),
|
||||||
|
AtomicExpression,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomicExpression: Expression = {
|
||||||
|
<l:@L> <v:"<var>"> => Expression::Reference(l, v.to_string()),
|
||||||
|
<l:@L> <n:"<num>"> => {
|
||||||
|
let val = Value::Number(n.0, n.1);
|
||||||
|
Expression::Value(l, val)
|
||||||
|
}
|
||||||
|
}
|
||||||
99
src/syntax/token_stream.rs
Normal file
99
src/syntax/token_stream.rs
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
use crate::syntax::tokens::Token;
|
||||||
|
use crate::util::istring::InternedString;
|
||||||
|
use logos::{Logos,SpannedIter};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io;
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
pub struct TokenStream<'s> {
|
||||||
|
filename: InternedString,
|
||||||
|
lexer: SpannedIter<'s, Token>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> TokenStream<'s> {
|
||||||
|
pub fn new(filename: &str, s: &'s str) -> TokenStream<'s> {
|
||||||
|
TokenStream {
|
||||||
|
filename: InternedString::new(filename),
|
||||||
|
lexer: Token::lexer(s).spanned()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_file(filename: &str, buffer: &'s mut String) -> io::Result<TokenStream<'s>> {
|
||||||
|
let mut file = File::open(filename)?;
|
||||||
|
file.read_to_string(buffer)?;
|
||||||
|
Ok(TokenStream::new(filename, buffer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone,Debug,PartialEq)]
|
||||||
|
pub enum Location {
|
||||||
|
InFile(InternedString, usize),
|
||||||
|
Manufactured
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Location {
|
||||||
|
fn new(filename: InternedString, offset: usize) -> Location {
|
||||||
|
Location::InFile(filename, offset)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Location {
|
||||||
|
fn default() -> Self {
|
||||||
|
Location::Manufactured
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug,PartialEq)]
|
||||||
|
pub struct LexerError {
|
||||||
|
filename: InternedString,
|
||||||
|
offset: usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
impl LexerError {
|
||||||
|
fn new(filename: InternedString, offset: usize) -> LexerError {
|
||||||
|
LexerError{ filename, offset, }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type LocatedToken = Result<(Location, Token, Location),LexerError>;
|
||||||
|
|
||||||
|
impl<'s> Iterator for TokenStream<'s> {
|
||||||
|
type Item = LocatedToken;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.lexer.next() {
|
||||||
|
None => None,
|
||||||
|
Some((Token::Error, span)) => {
|
||||||
|
Some(Err(LexerError {
|
||||||
|
filename: self.filename,
|
||||||
|
offset: span.start,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
Some((token, span)) => {
|
||||||
|
let start = Location::new(self.filename, span.start);
|
||||||
|
let end = Location::new(self.filename, span.end);
|
||||||
|
Some(Ok((start, token, end)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stream_works() {
|
||||||
|
let fname = InternedString::new("<file>");
|
||||||
|
let mut lex0 = TokenStream::new("<file>", "y = x + 1//foo");
|
||||||
|
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 0), Token::var("y"), Location::new(fname, 1)))));
|
||||||
|
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 2), Token::Equals, Location::new(fname, 3)))));
|
||||||
|
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 4), Token::var("x"), Location::new(fname, 5)))));
|
||||||
|
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 6), Token::Operator('+'), Location::new(fname, 7)))));
|
||||||
|
assert_eq!(lex0.next(), Some(Ok((Location::new(fname, 8), Token::Number((None, 1)), Location::new(fname, 9)))));
|
||||||
|
assert_eq!(lex0.next(), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn errors_work() {
|
||||||
|
let fname = InternedString::new("<file>");
|
||||||
|
let mut lex0 = TokenStream::new("<file>", "\u{2639}");
|
||||||
|
assert_eq!(lex0.next(), Some(Err(LexerError::new(fname, 0))));
|
||||||
|
}
|
||||||
@@ -1,14 +1,18 @@
|
|||||||
|
use crate::util::istring::InternedString;
|
||||||
use logos::{Lexer, Logos};
|
use logos::{Lexer, Logos};
|
||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
|
|
||||||
#[derive(Logos,Debug,PartialEq)]
|
#[derive(Logos,Clone,Debug,PartialEq)]
|
||||||
enum Token<'src> {
|
pub enum Token {
|
||||||
#[regex(r"[ \t\n\f]+", logos::skip)]
|
#[regex(r"[ \t\n\f]+", logos::skip)]
|
||||||
#[regex(r"//.*", logos::skip)]
|
#[regex(r"//.*", logos::skip)]
|
||||||
|
|
||||||
#[token("=")]
|
#[token("=")]
|
||||||
Equals,
|
Equals,
|
||||||
|
|
||||||
|
#[token(";")]
|
||||||
|
Semi,
|
||||||
|
|
||||||
#[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))]
|
#[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))]
|
||||||
Operator(char),
|
Operator(char),
|
||||||
|
|
||||||
@@ -19,14 +23,21 @@ enum Token<'src> {
|
|||||||
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
||||||
Number((Option<u8>, i128)),
|
Number((Option<u8>, i128)),
|
||||||
|
|
||||||
#[regex(r"[a-z][a-zA-Z0-9_]*")]
|
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| InternedString::new(v.slice()))]
|
||||||
Variable(&'src str),
|
Variable(InternedString),
|
||||||
|
|
||||||
#[error]
|
#[error]
|
||||||
Error,
|
Error,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_number<'a,'src>(base: Option<u8>, value: &'a Lexer<'src, Token<'src>>) -> Result<(Option<u8>, i128), ParseIntError> {
|
#[cfg(test)]
|
||||||
|
impl Token {
|
||||||
|
pub(crate) fn var(s: &str) -> Token {
|
||||||
|
Token::Variable(InternedString::new(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_number<'a,'src>(base: Option<u8>, value: &'a Lexer<'src, Token>) -> Result<(Option<u8>, i128), ParseIntError> {
|
||||||
let (radix, strval) = match base {
|
let (radix, strval) = match base {
|
||||||
None => (10, value.slice()),
|
None => (10, value.slice()),
|
||||||
Some(radix) => (radix, &value.slice()[2..]),
|
Some(radix) => (radix, &value.slice()[2..]),
|
||||||
@@ -51,20 +62,20 @@ fn lex_numbers() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn lex_symbols() {
|
fn lex_symbols() {
|
||||||
let mut lex0 = Token::lexer("x + \t y * \n z // rest");
|
let mut lex0 = Token::lexer("x + \t y * \n z // rest");
|
||||||
assert_eq!(lex0.next(), Some(Token::Variable("x")));
|
assert_eq!(lex0.next(), Some(Token::var("x")));
|
||||||
assert_eq!(lex0.next(), Some(Token::Operator('+')));
|
assert_eq!(lex0.next(), Some(Token::Operator('+')));
|
||||||
assert_eq!(lex0.next(), Some(Token::Variable("y")));
|
assert_eq!(lex0.next(), Some(Token::var("y")));
|
||||||
assert_eq!(lex0.next(), Some(Token::Operator('*')));
|
assert_eq!(lex0.next(), Some(Token::Operator('*')));
|
||||||
assert_eq!(lex0.next(), Some(Token::Variable("z")));
|
assert_eq!(lex0.next(), Some(Token::var("z")));
|
||||||
assert_eq!(lex0.next(), None);
|
assert_eq!(lex0.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lexer_spans() {
|
fn lexer_spans() {
|
||||||
let mut lex0 = Token::lexer("y = x + 1//foo").spanned();
|
let mut lex0 = Token::lexer("y = x + 1//foo").spanned();
|
||||||
assert_eq!(lex0.next(), Some((Token::Variable("y"), 0..1)));
|
assert_eq!(lex0.next(), Some((Token::var("y"), 0..1)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Variable("x"), 4..5)));
|
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
||||||
assert_eq!(lex0.next(), None);
|
assert_eq!(lex0.next(), None);
|
||||||
|
|||||||
1
src/util.rs
Normal file
1
src/util.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod istring;
|
||||||
68
src/util/istring.rs
Normal file
68
src/util/istring.rs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
use lazy_static::lazy_static;
|
||||||
|
use std::cmp::{Ordering, max};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fmt;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref STRING_TABLE: RwLock<HashMap<u64, String>> = RwLock::new(HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct InternedString {
|
||||||
|
index: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InternedString {
|
||||||
|
/// Return the `InternedString` equivalent of the provided string. This function is slow, and
|
||||||
|
/// should be used somewhat sparingly.
|
||||||
|
pub fn new(s: &str) -> Self {
|
||||||
|
let mut biggest_index = 0;
|
||||||
|
let mut table = STRING_TABLE.write().unwrap();
|
||||||
|
|
||||||
|
for (k, v) in table.iter() {
|
||||||
|
if v == s {
|
||||||
|
return InternedString{ index: *k }
|
||||||
|
}
|
||||||
|
biggest_index = max(biggest_index, *k);
|
||||||
|
}
|
||||||
|
|
||||||
|
let res = biggest_index + 1;
|
||||||
|
table.insert(res, s.to_string());
|
||||||
|
InternedString {
|
||||||
|
index: res
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for InternedString {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match STRING_TABLE.read().unwrap().get(&self.index) {
|
||||||
|
None => write!(f, "<BROKEN-INTERN>"),
|
||||||
|
Some(x) => write!(f, "{:?}", x),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for InternedString {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match STRING_TABLE.read().unwrap().get(&self.index) {
|
||||||
|
None => write!(f, "<BROKEN-INTERN>"),
|
||||||
|
Some(x) => write!(f, "{}", x),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd<InternedString> for InternedString {
|
||||||
|
fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
|
||||||
|
let table = STRING_TABLE.read().unwrap();
|
||||||
|
|
||||||
|
if let Some(me) = table.get(&self.index) {
|
||||||
|
if let Some(them) = table.get(&other.index) {
|
||||||
|
return me.partial_cmp(them);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user