From c0b65ef416a766b06ac0a2af465f34aa928e5d47 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Fri, 12 May 2023 17:22:33 -0700 Subject: [PATCH] Add type suffixes to numbers in the lexer. --- src/syntax/parser.lalrpop | 8 +-- src/syntax/tokens.rs | 113 ++++++++++++++++++++++++++++++-------- 2 files changed, 94 insertions(+), 27 deletions(-) diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 3d8de29..b6ef647 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -10,7 +10,7 @@ //! use crate::syntax::{LexerError, Location}; use crate::syntax::ast::{Program,Statement,Expression,Value}; -use crate::syntax::tokens::Token; +use crate::syntax::tokens::{ConstantType, Token}; use internment::ArcIntern; // one cool thing about lalrpop: we can pass arguments. in this case, the @@ -44,7 +44,7 @@ extern { // to name and use "their value", you get their source location. // For these, we want "their value" to be their actual contents, // which is why we put their types in angle brackets. - "" => Token::Number((>,)), + "" => Token::Number((>,>,)), "" => Token::Variable(>), } } @@ -143,7 +143,7 @@ AtomicExpression: Expression = { "> => Expression::Reference(Location::new(file_idx, l), v.to_string()), // just a number "> => { - let val = Value::Number(n.0, n.1); + let val = Value::Number(n.0, n.2); Expression::Value(Location::new(file_idx, l), val) }, // a tricky case: also just a number, but using a negative sign. an @@ -153,7 +153,7 @@ AtomicExpression: Expression = { // write positive numbers which are immediately sent to a negation // primitive! "-" "> => { - let val = Value::Number(n.0, -n.1); + let val = Value::Number(n.0, -n.2); Expression::Value(Location::new(file_idx, l), val) }, // finally, let people parenthesize expressions and get back to a diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index e20757d..8b6efd1 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -53,13 +53,14 @@ pub enum Token { /// Numbers capture both the value we read from the input, /// converted to an `i64`, as well as the base the user used - /// to write the number, if they did so. - #[regex(r"0b[01]+", |v| parse_number(Some(2), v))] - #[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))] - #[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))] - #[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))] - #[regex(r"[0-9]+", |v| parse_number(None, v))] - Number((Option, i64)), + /// to write the number and/or the type the user specified, + /// if they did either. + #[regex(r"0b[01]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(2), v))] + #[regex(r"0o[0-7]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(8), v))] + #[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))] + #[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))] + #[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))] + Number((Option, Option, i64)), // Variables; this is a very standard, simple set of characters // for variables, but feel free to experiment with more complicated @@ -90,13 +91,25 @@ impl fmt::Display for Token { Token::RightParen => write!(f, "')'"), Token::Print => write!(f, "'print'"), Token::Operator(c) => write!(f, "'{}'", c), - Token::Number((None, v)) => write!(f, "'{}'", v), - Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v), - Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), - Token::Number((Some(10), v)) => write!(f, "'{}'", v), - Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), - Token::Number((Some(b), v)) => { - write!(f, "Invalidly-based-number", b, v) + Token::Number((None, otype, v)) => write!(f, "'{}{}'", v, display_optional_type(otype)), + Token::Number((Some(2), otype, v)) => { + write!(f, "'0b{:b}{}'", v, display_optional_type(otype)) + } + Token::Number((Some(8), otype, v)) => { + write!(f, "'0o{:o}{}'", v, display_optional_type(otype)) + } + Token::Number((Some(10), otype, v)) => { + write!(f, "'{}{}'", v, display_optional_type(otype)) + } + Token::Number((Some(16), otype, v)) => { + write!(f, "'0x{:x}{}'", v, display_optional_type(otype)) + } + Token::Number((Some(b), opt_type, v)) => { + write!( + f, + "Invalidly-based-number", + b, v, opt_type + ) } Token::Variable(s) => write!(f, "'{}'", s), Token::Error => write!(f, ""), @@ -122,6 +135,18 @@ impl Token { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ConstantType { + U8, + U16, + U32, + U64, + I8, + I16, + I32, + I64, +} + /// Parse a number in the given base, return a pair of the base and the /// parsed number. This is just a helper used for all of the number /// regular expression cases, which kicks off to the obvious Rust @@ -129,24 +154,66 @@ impl Token { fn parse_number( base: Option, value: &Lexer, -) -> Result<(Option, i64), ParseIntError> { +) -> Result<(Option, Option, i64), ParseIntError> { let (radix, strval) = match base { None => (10, value.slice()), Some(radix) => (radix, &value.slice()[2..]), }; + let (declared_type, strval) = if let Some(strval) = strval.strip_suffix("u8") { + (Some(ConstantType::U8), strval) + } else if let Some(strval) = strval.strip_suffix("u16") { + (Some(ConstantType::U16), strval) + } else if let Some(strval) = strval.strip_suffix("u32") { + (Some(ConstantType::U32), strval) + } else if let Some(strval) = strval.strip_suffix("u64") { + (Some(ConstantType::U64), strval) + } else if let Some(strval) = strval.strip_suffix("i8") { + (Some(ConstantType::I8), strval) + } else if let Some(strval) = strval.strip_suffix("i16") { + (Some(ConstantType::I16), strval) + } else if let Some(strval) = strval.strip_suffix("i32") { + (Some(ConstantType::I32), strval) + } else if let Some(strval) = strval.strip_suffix("i64") { + (Some(ConstantType::I64), strval) + } else { + (None, strval) + }; + let intval = i64::from_str_radix(strval, radix as u32)?; - Ok((base, intval)) + Ok((base, declared_type, intval)) +} + +fn display_optional_type(otype: &Option) -> &'static str { + match otype { + None => "", + Some(ConstantType::I8) => "i8", + Some(ConstantType::I16) => "i16", + Some(ConstantType::I32) => "i32", + Some(ConstantType::I64) => "i64", + Some(ConstantType::U8) => "u8", + Some(ConstantType::U16) => "u16", + Some(ConstantType::U32) => "u32", + Some(ConstantType::U64) => "u64", + } } #[test] fn lex_numbers() { - let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9"); - assert_eq!(lex0.next(), Some(Token::Number((None, 12)))); - assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12)))); - assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12)))); - assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12)))); - assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12)))); + let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc 12u8 0xci64// 9"); + assert_eq!(lex0.next(), Some(Token::Number((None, None, 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(2), None, 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(8), None, 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(10), None, 12)))); + assert_eq!(lex0.next(), Some(Token::Number((Some(16), None, 12)))); + assert_eq!( + lex0.next(), + Some(Token::Number((None, Some(ConstantType::U8), 12))) + ); + assert_eq!( + lex0.next(), + Some(Token::Number((Some(16), Some(ConstantType::I64), 12))) + ); assert_eq!(lex0.next(), None); } @@ -168,6 +235,6 @@ fn lexer_spans() { assert_eq!(lex0.next(), Some((Token::Equals, 2..3))); assert_eq!(lex0.next(), Some((Token::var("x"), 4..5))); assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7))); - assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); + assert_eq!(lex0.next(), Some((Token::Number((None, None, 1)), 8..9))); assert_eq!(lex0.next(), None); }