🤷 The initial version of the compiler, both static and JIT.
This implements a full compiler, with both static compilation and JIT support, for the world's simplest and silliest programming language. You can do math, and print variables. That's it. On the bright side, it implements every part of the compiler, from the lexer and parser; through analysis and simplification; and into a reasonable code generator. This should be a good jumping off point for adding more advanced features. Tests, including proptests, are included to help avoid regressions.
This commit is contained in:
123
src/syntax/tokens.rs
Normal file
123
src/syntax/tokens.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use internment::ArcIntern;
|
||||
use logos::{Lexer, Logos};
|
||||
use std::fmt;
|
||||
use std::num::ParseIntError;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Logos, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
#[token("=")]
|
||||
Equals,
|
||||
|
||||
#[token(";")]
|
||||
Semi,
|
||||
|
||||
#[token("(")]
|
||||
LeftParen,
|
||||
|
||||
#[token(")")]
|
||||
RightParen,
|
||||
|
||||
#[token("print")]
|
||||
Print,
|
||||
|
||||
#[regex(r"[+\-*/]", |v| v.slice().chars().next())]
|
||||
Operator(char),
|
||||
|
||||
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
||||
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
||||
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
||||
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
|
||||
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
||||
Number((Option<u8>, i64)),
|
||||
|
||||
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
|
||||
Variable(ArcIntern<String>),
|
||||
|
||||
#[error]
|
||||
#[regex(r"[ \t\r\n\f]+", logos::skip)]
|
||||
#[regex(r"//.*", logos::skip)]
|
||||
Error,
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Token::Equals => write!(f, "'='"),
|
||||
Token::Semi => write!(f, "';'"),
|
||||
Token::LeftParen => write!(f, "'('"),
|
||||
Token::RightParen => write!(f, "')'"),
|
||||
Token::Print => write!(f, "'print'"),
|
||||
Token::Operator(c) => write!(f, "'{}'", c),
|
||||
Token::Number((None, v)) => write!(f, "'{}'", v),
|
||||
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
|
||||
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
|
||||
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
|
||||
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
|
||||
Token::Number((Some(b), v)) => {
|
||||
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
|
||||
}
|
||||
Token::Variable(s) => write!(f, "'{}'", s),
|
||||
Token::Error => write!(f, "<error>"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, PartialEq, Eq)]
|
||||
pub enum LexerError {
|
||||
#[error("Failed lexing at {0}")]
|
||||
LexFailure(usize),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Token {
|
||||
pub(crate) fn var(s: &str) -> Token {
|
||||
Token::Variable(ArcIntern::new(s.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_number(
|
||||
base: Option<u8>,
|
||||
value: &Lexer<Token>,
|
||||
) -> Result<(Option<u8>, i64), ParseIntError> {
|
||||
let (radix, strval) = match base {
|
||||
None => (10, value.slice()),
|
||||
Some(radix) => (radix, &value.slice()[2..]),
|
||||
};
|
||||
|
||||
let intval = i64::from_str_radix(strval, radix as u32)?;
|
||||
Ok((base, intval))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lex_numbers() {
|
||||
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
|
||||
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lex_symbols() {
|
||||
let mut lex0 = Token::lexer("x + \t y * \n z // rest");
|
||||
assert_eq!(lex0.next(), Some(Token::var("x")));
|
||||
assert_eq!(lex0.next(), Some(Token::Operator('+')));
|
||||
assert_eq!(lex0.next(), Some(Token::var("y")));
|
||||
assert_eq!(lex0.next(), Some(Token::Operator('*')));
|
||||
assert_eq!(lex0.next(), Some(Token::var("z")));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexer_spans() {
|
||||
let mut lex0 = Token::lexer("y = x + 1//foo").spanned();
|
||||
assert_eq!(lex0.next(), Some((Token::var("y"), 0..1)));
|
||||
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
||||
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
||||
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
||||
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
Reference in New Issue
Block a user