Add type suffixes to numbers in the lexer.
This commit is contained in:
@@ -10,7 +10,7 @@
|
|||||||
//!
|
//!
|
||||||
use crate::syntax::{LexerError, Location};
|
use crate::syntax::{LexerError, Location};
|
||||||
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
||||||
use crate::syntax::tokens::Token;
|
use crate::syntax::tokens::{ConstantType, Token};
|
||||||
use internment::ArcIntern;
|
use internment::ArcIntern;
|
||||||
|
|
||||||
// one cool thing about lalrpop: we can pass arguments. in this case, the
|
// one cool thing about lalrpop: we can pass arguments. in this case, the
|
||||||
@@ -44,7 +44,7 @@ extern {
|
|||||||
// to name and use "their value", you get their source location.
|
// to name and use "their value", you get their source location.
|
||||||
// For these, we want "their value" to be their actual contents,
|
// For these, we want "their value" to be their actual contents,
|
||||||
// which is why we put their types in angle brackets.
|
// which is why we put their types in angle brackets.
|
||||||
"<num>" => Token::Number((<Option<u8>>,<i64>)),
|
"<num>" => Token::Number((<Option<u8>>,<Option<ConstantType>>,<i64>)),
|
||||||
"<var>" => Token::Variable(<ArcIntern<String>>),
|
"<var>" => Token::Variable(<ArcIntern<String>>),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -143,7 +143,7 @@ AtomicExpression: Expression = {
|
|||||||
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
|
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
|
||||||
// just a number
|
// just a number
|
||||||
<l:@L> <n:"<num>"> => {
|
<l:@L> <n:"<num>"> => {
|
||||||
let val = Value::Number(n.0, n.1);
|
let val = Value::Number(n.0, n.2);
|
||||||
Expression::Value(Location::new(file_idx, l), val)
|
Expression::Value(Location::new(file_idx, l), val)
|
||||||
},
|
},
|
||||||
// a tricky case: also just a number, but using a negative sign. an
|
// a tricky case: also just a number, but using a negative sign. an
|
||||||
@@ -153,7 +153,7 @@ AtomicExpression: Expression = {
|
|||||||
// write positive numbers which are immediately sent to a negation
|
// write positive numbers which are immediately sent to a negation
|
||||||
// primitive!
|
// primitive!
|
||||||
<l:@L> "-" <n:"<num>"> => {
|
<l:@L> "-" <n:"<num>"> => {
|
||||||
let val = Value::Number(n.0, -n.1);
|
let val = Value::Number(n.0, -n.2);
|
||||||
Expression::Value(Location::new(file_idx, l), val)
|
Expression::Value(Location::new(file_idx, l), val)
|
||||||
},
|
},
|
||||||
// finally, let people parenthesize expressions and get back to a
|
// finally, let people parenthesize expressions and get back to a
|
||||||
|
|||||||
@@ -53,13 +53,14 @@ pub enum Token {
|
|||||||
|
|
||||||
/// Numbers capture both the value we read from the input,
|
/// Numbers capture both the value we read from the input,
|
||||||
/// converted to an `i64`, as well as the base the user used
|
/// converted to an `i64`, as well as the base the user used
|
||||||
/// to write the number, if they did so.
|
/// to write the number and/or the type the user specified,
|
||||||
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
/// if they did either.
|
||||||
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
#[regex(r"0b[01]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(2), v))]
|
||||||
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
#[regex(r"0o[0-7]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(8), v))]
|
||||||
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
|
#[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))]
|
||||||
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
#[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))]
|
||||||
Number((Option<u8>, i64)),
|
#[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))]
|
||||||
|
Number((Option<u8>, Option<ConstantType>, i64)),
|
||||||
|
|
||||||
// Variables; this is a very standard, simple set of characters
|
// Variables; this is a very standard, simple set of characters
|
||||||
// for variables, but feel free to experiment with more complicated
|
// for variables, but feel free to experiment with more complicated
|
||||||
@@ -90,13 +91,25 @@ impl fmt::Display for Token {
|
|||||||
Token::RightParen => write!(f, "')'"),
|
Token::RightParen => write!(f, "')'"),
|
||||||
Token::Print => write!(f, "'print'"),
|
Token::Print => write!(f, "'print'"),
|
||||||
Token::Operator(c) => write!(f, "'{}'", c),
|
Token::Operator(c) => write!(f, "'{}'", c),
|
||||||
Token::Number((None, v)) => write!(f, "'{}'", v),
|
Token::Number((None, otype, v)) => write!(f, "'{}{}'", v, display_optional_type(otype)),
|
||||||
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
|
Token::Number((Some(2), otype, v)) => {
|
||||||
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
|
write!(f, "'0b{:b}{}'", v, display_optional_type(otype))
|
||||||
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
|
}
|
||||||
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
|
Token::Number((Some(8), otype, v)) => {
|
||||||
Token::Number((Some(b), v)) => {
|
write!(f, "'0o{:o}{}'", v, display_optional_type(otype))
|
||||||
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
|
}
|
||||||
|
Token::Number((Some(10), otype, v)) => {
|
||||||
|
write!(f, "'{}{}'", v, display_optional_type(otype))
|
||||||
|
}
|
||||||
|
Token::Number((Some(16), otype, v)) => {
|
||||||
|
write!(f, "'0x{:x}{}'", v, display_optional_type(otype))
|
||||||
|
}
|
||||||
|
Token::Number((Some(b), opt_type, v)) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Invalidly-based-number<base={},val={},opt_type={:?}>",
|
||||||
|
b, v, opt_type
|
||||||
|
)
|
||||||
}
|
}
|
||||||
Token::Variable(s) => write!(f, "'{}'", s),
|
Token::Variable(s) => write!(f, "'{}'", s),
|
||||||
Token::Error => write!(f, "<error>"),
|
Token::Error => write!(f, "<error>"),
|
||||||
@@ -122,6 +135,18 @@ impl Token {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
pub enum ConstantType {
|
||||||
|
U8,
|
||||||
|
U16,
|
||||||
|
U32,
|
||||||
|
U64,
|
||||||
|
I8,
|
||||||
|
I16,
|
||||||
|
I32,
|
||||||
|
I64,
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a number in the given base, return a pair of the base and the
|
/// Parse a number in the given base, return a pair of the base and the
|
||||||
/// parsed number. This is just a helper used for all of the number
|
/// parsed number. This is just a helper used for all of the number
|
||||||
/// regular expression cases, which kicks off to the obvious Rust
|
/// regular expression cases, which kicks off to the obvious Rust
|
||||||
@@ -129,24 +154,66 @@ impl Token {
|
|||||||
fn parse_number(
|
fn parse_number(
|
||||||
base: Option<u8>,
|
base: Option<u8>,
|
||||||
value: &Lexer<Token>,
|
value: &Lexer<Token>,
|
||||||
) -> Result<(Option<u8>, i64), ParseIntError> {
|
) -> Result<(Option<u8>, Option<ConstantType>, i64), ParseIntError> {
|
||||||
let (radix, strval) = match base {
|
let (radix, strval) = match base {
|
||||||
None => (10, value.slice()),
|
None => (10, value.slice()),
|
||||||
Some(radix) => (radix, &value.slice()[2..]),
|
Some(radix) => (radix, &value.slice()[2..]),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let (declared_type, strval) = if let Some(strval) = strval.strip_suffix("u8") {
|
||||||
|
(Some(ConstantType::U8), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("u16") {
|
||||||
|
(Some(ConstantType::U16), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("u32") {
|
||||||
|
(Some(ConstantType::U32), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("u64") {
|
||||||
|
(Some(ConstantType::U64), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("i8") {
|
||||||
|
(Some(ConstantType::I8), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("i16") {
|
||||||
|
(Some(ConstantType::I16), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("i32") {
|
||||||
|
(Some(ConstantType::I32), strval)
|
||||||
|
} else if let Some(strval) = strval.strip_suffix("i64") {
|
||||||
|
(Some(ConstantType::I64), strval)
|
||||||
|
} else {
|
||||||
|
(None, strval)
|
||||||
|
};
|
||||||
|
|
||||||
let intval = i64::from_str_radix(strval, radix as u32)?;
|
let intval = i64::from_str_radix(strval, radix as u32)?;
|
||||||
Ok((base, intval))
|
Ok((base, declared_type, intval))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn display_optional_type(otype: &Option<ConstantType>) -> &'static str {
|
||||||
|
match otype {
|
||||||
|
None => "",
|
||||||
|
Some(ConstantType::I8) => "i8",
|
||||||
|
Some(ConstantType::I16) => "i16",
|
||||||
|
Some(ConstantType::I32) => "i32",
|
||||||
|
Some(ConstantType::I64) => "i64",
|
||||||
|
Some(ConstantType::U8) => "u8",
|
||||||
|
Some(ConstantType::U16) => "u16",
|
||||||
|
Some(ConstantType::U32) => "u32",
|
||||||
|
Some(ConstantType::U64) => "u64",
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn lex_numbers() {
|
fn lex_numbers() {
|
||||||
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
|
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc 12u8 0xci64// 9");
|
||||||
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
|
assert_eq!(lex0.next(), Some(Token::Number((None, None, 12))));
|
||||||
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
|
assert_eq!(lex0.next(), Some(Token::Number((Some(2), None, 12))));
|
||||||
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
|
assert_eq!(lex0.next(), Some(Token::Number((Some(8), None, 12))));
|
||||||
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
|
assert_eq!(lex0.next(), Some(Token::Number((Some(10), None, 12))));
|
||||||
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
|
assert_eq!(lex0.next(), Some(Token::Number((Some(16), None, 12))));
|
||||||
|
assert_eq!(
|
||||||
|
lex0.next(),
|
||||||
|
Some(Token::Number((None, Some(ConstantType::U8), 12)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
lex0.next(),
|
||||||
|
Some(Token::Number((Some(16), Some(ConstantType::I64), 12)))
|
||||||
|
);
|
||||||
assert_eq!(lex0.next(), None);
|
assert_eq!(lex0.next(), None);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -168,6 +235,6 @@ fn lexer_spans() {
|
|||||||
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
||||||
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
||||||
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
assert_eq!(lex0.next(), Some((Token::Number((None, None, 1)), 8..9)));
|
||||||
assert_eq!(lex0.next(), None);
|
assert_eq!(lex0.next(), None);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user