commit 81f98cc2c9c73b9731b36f6c76fe85b774adcdd5 Author: Adam Wick Date: Sat Aug 1 20:45:33 2020 -0700 Initial commit; a basic Logos lexer and some tests. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..ada12c3 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "ngr" +version = "0.1.0" +authors = ["awick"] +edition = "2018" + +[lib] +name = "ngr" +path = "src/lib.rs" + +[[bin]] +name = "ngrc" +path = "src/bin.rs" + +[dependencies] +logos = "0.11.4" +lalrpop-util = "0.19.0" + +[build-dependencies] +lalrpop = "0.19.0" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..23c7d3f --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +extern crate lalrpop; + +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/bin.rs b/src/bin.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/bin.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4a39d2c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod syntax; diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..f13fc07 --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,5 @@ +//use lalrpop_util::lalrpop_mod; + +pub mod tokens; +//lalrpop_mod!(pub parser); +pub mod ast; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..85192a5 --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,14 @@ +pub enum Stmt { + Binding(String, Expr), + Expr(Expr), +} + +pub enum Expr { + Value(Value), + Reference(String), + Primitive(String, Vec), +} + +pub enum Value { + Number(Option, i128) +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs new file mode 100644 index 0000000..2e2711d --- /dev/null +++ b/src/syntax/tokens.rs @@ -0,0 +1,72 @@ +use logos::{Lexer, Logos}; +use std::num::ParseIntError; + +#[derive(Logos,Debug,PartialEq)] +enum Token<'src> { + #[regex(r"[ \t\n\f]+", logos::skip)] + #[regex(r"//.*", logos::skip)] + + #[token("=")] + Equals, + + #[regex(r"[+\-*/]", |v| v.slice().chars().nth(0))] + Operator(char), + + #[regex(r"0b[01]+", |v| parse_number(Some(2), v))] + #[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))] + #[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))] + #[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))] + #[regex(r"[0-9]+", |v| parse_number(None, v))] + Number((Option, i128)), + + #[regex(r"[a-z][a-zA-Z0-9_]*")] + Variable(&'src str), + + #[error] + Error, +} + +fn parse_number<'a,'src>(base: Option, value: &'a Lexer<'src, Token<'src>>) -> Result<(Option, i128), ParseIntError> { + let (radix, strval) = match base { + None => (10, value.slice()), + Some(radix) => (radix, &value.slice()[2..]), + }; + + println!("HERE! (radix {}, slice |{}|", radix, strval); + let intval = i128::from_str_radix(strval, radix as u32)?; + Ok((base, intval)) +} + +#[test] +fn lex_numbers() { + let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9"); + assert_eq!(lex0.next(), Some(Token::Number((None, 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12)))); + assert_eq!(lex0.next(), None); +} + +#[test] +fn lex_symbols() { + let mut lex0 = Token::lexer("x + \t y * \n z // rest"); + assert_eq!(lex0.next(), Some(Token::Variable("x"))); + assert_eq!(lex0.next(), Some(Token::Operator('+'))); + assert_eq!(lex0.next(), Some(Token::Variable("y"))); + assert_eq!(lex0.next(), Some(Token::Operator('*'))); + assert_eq!(lex0.next(), Some(Token::Variable("z"))); + assert_eq!(lex0.next(), None); +} + +#[test] +fn lexer_spans() { + let mut lex0 = Token::lexer("y = x + 1//foo").spanned(); + assert_eq!(lex0.next(), Some((Token::Variable("y"), 0..1))); + assert_eq!(lex0.next(), Some((Token::Equals, 2..3))); + assert_eq!(lex0.next(), Some((Token::Variable("x"), 4..5))); + assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7))); + assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); + assert_eq!(lex0.next(), None); +} +