use internment::ArcIntern; use logos::{Lexer, Logos}; use std::fmt; use std::num::ParseIntError; use thiserror::Error; #[derive(Logos, Clone, Debug, PartialEq, Eq)] pub enum Token { #[token("=")] Equals, #[token(";")] Semi, #[token("print")] Print, #[regex(r"[+\-*/]", |v| v.slice().chars().next())] Operator(char), #[regex(r"0b[01]+", |v| parse_number(Some(2), v))] #[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))] #[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))] #[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))] #[regex(r"[0-9]+", |v| parse_number(None, v))] Number((Option, i64)), #[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))] Variable(ArcIntern), #[error] #[regex(r"[ \t\r\n\f]+", logos::skip)] #[regex(r"//.*", logos::skip)] Error, } impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Token::Equals => write!(f, "'='"), Token::Semi => write!(f, "';'"), Token::Print => write!(f, "'print'"), Token::Operator(c) => write!(f, "'{}'", c), Token::Number((None, v)) => write!(f, "'{}'", v), Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v), Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), Token::Number((Some(10), v)) => write!(f, "'{}'", v), Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), Token::Number((Some(b), v)) => { write!(f, "Invalidly-based-number", b, v) } Token::Variable(s) => write!(f, "'{}'", s), Token::Error => write!(f, ""), } } } #[derive(Debug, Error, PartialEq, Eq)] pub enum LexerError { #[error("Failed lexing at {0}")] LexFailure(usize), } #[cfg(test)] impl Token { pub(crate) fn var(s: &str) -> Token { Token::Variable(ArcIntern::new(s.to_string())) } } fn parse_number( base: Option, value: &Lexer, ) -> Result<(Option, i64), ParseIntError> { let (radix, strval) = match base { None => (10, value.slice()), Some(radix) => (radix, &value.slice()[2..]), }; let intval = i64::from_str_radix(strval, radix as u32)?; Ok((base, intval)) } #[test] fn lex_numbers() { let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9"); assert_eq!(lex0.next(), Some(Token::Number((None, 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12)))); assert_eq!(lex0.next(), None); } #[test] fn lex_symbols() { let mut lex0 = Token::lexer("x + \t y * \n z // rest"); assert_eq!(lex0.next(), Some(Token::var("x"))); assert_eq!(lex0.next(), Some(Token::Operator('+'))); assert_eq!(lex0.next(), Some(Token::var("y"))); assert_eq!(lex0.next(), Some(Token::Operator('*'))); assert_eq!(lex0.next(), Some(Token::var("z"))); assert_eq!(lex0.next(), None); } #[test] fn lexer_spans() { let mut lex0 = Token::lexer("y = x + 1//foo").spanned(); assert_eq!(lex0.next(), Some((Token::var("y"), 0..1))); assert_eq!(lex0.next(), Some((Token::Equals, 2..3))); assert_eq!(lex0.next(), Some((Token::var("x"), 4..5))); assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7))); assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), None); }