116 lines
3.6 KiB
Rust
116 lines
3.6 KiB
Rust
use internment::ArcIntern;
|
|
use logos::{Lexer, Logos};
|
|
use std::fmt;
|
|
use std::num::ParseIntError;
|
|
use thiserror::Error;
|
|
|
|
#[derive(Logos, Clone, Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
#[token("=")]
|
|
Equals,
|
|
|
|
#[token(";")]
|
|
Semi,
|
|
|
|
#[token("print")]
|
|
Print,
|
|
|
|
#[regex(r"[+\-*/]", |v| v.slice().chars().next())]
|
|
Operator(char),
|
|
|
|
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
|
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
|
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
|
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
|
|
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
|
Number((Option<u8>, i128)),
|
|
|
|
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
|
|
Variable(ArcIntern<String>),
|
|
|
|
#[error]
|
|
#[regex(r"[ \t\r\n\f]+", logos::skip)]
|
|
#[regex(r"//.*", logos::skip)]
|
|
Error,
|
|
}
|
|
|
|
impl fmt::Display for Token {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
Token::Equals => write!(f, "'='"),
|
|
Token::Semi => write!(f, "';'"),
|
|
Token::Print => write!(f, "'print'"),
|
|
Token::Operator(c) => write!(f, "'{}'", c),
|
|
Token::Number((None, v)) => write!(f, "'{}'", v),
|
|
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
|
|
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
|
|
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
|
|
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
|
|
Token::Number((Some(b), v)) => {
|
|
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
|
|
}
|
|
Token::Variable(s) => write!(f, "'{}'", s),
|
|
Token::Error => write!(f, "<error>"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Error, PartialEq, Eq)]
|
|
pub enum LexerError {
|
|
#[error("Failed lexing at {0}")]
|
|
LexFailure(usize),
|
|
}
|
|
|
|
#[cfg(test)]
|
|
impl Token {
|
|
pub(crate) fn var(s: &str) -> Token {
|
|
Token::Variable(ArcIntern::new(s.to_string()))
|
|
}
|
|
}
|
|
|
|
fn parse_number(
|
|
base: Option<u8>,
|
|
value: &Lexer<Token>,
|
|
) -> Result<(Option<u8>, i128), ParseIntError> {
|
|
let (radix, strval) = match base {
|
|
None => (10, value.slice()),
|
|
Some(radix) => (radix, &value.slice()[2..]),
|
|
};
|
|
|
|
let intval = i128::from_str_radix(strval, radix as u32)?;
|
|
Ok((base, intval))
|
|
}
|
|
|
|
#[test]
|
|
fn lex_numbers() {
|
|
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
|
|
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
|
|
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
|
|
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
|
|
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
|
|
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
|
|
assert_eq!(lex0.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn lex_symbols() {
|
|
let mut lex0 = Token::lexer("x + \t y * \n z // rest");
|
|
assert_eq!(lex0.next(), Some(Token::var("x")));
|
|
assert_eq!(lex0.next(), Some(Token::Operator('+')));
|
|
assert_eq!(lex0.next(), Some(Token::var("y")));
|
|
assert_eq!(lex0.next(), Some(Token::Operator('*')));
|
|
assert_eq!(lex0.next(), Some(Token::var("z")));
|
|
assert_eq!(lex0.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn lexer_spans() {
|
|
let mut lex0 = Token::lexer("y = x + 1//foo").spanned();
|
|
assert_eq!(lex0.next(), Some((Token::var("y"), 0..1)));
|
|
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
|
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
|
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
|
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
|
assert_eq!(lex0.next(), None);
|
|
}
|