🤔 Add a type inference engine, along with typed literals. (#4)

The typed literal formatting mirrors that of Rust. If no type can be
inferred for an untagged literal, the type inference engine will warn
the user and then assume that they meant an unsigned 64-bit number.
(This is slightly inconvenient, because there can be cases in which our
Arbitrary instance may generate a unary negation, in which we should
assume that it's a signed 64-bit number; we may want to revisit this
later.)

The type inference engine is a standard two phase one, in which we first
generate a series of type constraints, and then we solve those
constraints. In this particular implementation, we actually use a third
phase to generate a final AST.

Finally, to increase the amount of testing performed, I've removed the
overflow checking in the evaluator. The only thing we now check for is
division by zero. This does make things a trace slower in testing, but
hopefully we get more coverage this way.
This commit was merged in pull request #4.
This commit is contained in:
2023-09-19 20:40:05 -07:00
committed by GitHub
parent 1fbfd0c2d2
commit bd3b9af469
44 changed files with 3258 additions and 702 deletions

View File

@@ -40,6 +40,12 @@ pub enum Token {
#[token(")")]
RightParen,
#[token("<")]
LessThan,
#[token(">")]
GreaterThan,
// Next we take of any reserved words; I always like to put
// these before we start recognizing more complicated regular
// expressions. I don't think it matters, but it works for me.
@@ -53,13 +59,14 @@ pub enum Token {
/// Numbers capture both the value we read from the input,
/// converted to an `i64`, as well as the base the user used
/// to write the number, if they did so.
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
#[regex(r"[0-9]+", |v| parse_number(None, v))]
Number((Option<u8>, i64)),
/// to write the number and/or the type the user specified,
/// if they did either.
#[regex(r"0b[01]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(2), v))]
#[regex(r"0o[0-7]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(8), v))]
#[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))]
#[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))]
#[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))]
Number((Option<u8>, Option<ConstantType>, u64)),
// Variables; this is a very standard, simple set of characters
// for variables, but feel free to experiment with more complicated
@@ -88,15 +95,29 @@ impl fmt::Display for Token {
Token::Semi => write!(f, "';'"),
Token::LeftParen => write!(f, "'('"),
Token::RightParen => write!(f, "')'"),
Token::LessThan => write!(f, "<"),
Token::GreaterThan => write!(f, ">"),
Token::Print => write!(f, "'print'"),
Token::Operator(c) => write!(f, "'{}'", c),
Token::Number((None, v)) => write!(f, "'{}'", v),
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
Token::Number((Some(b), v)) => {
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
Token::Number((None, otype, v)) => write!(f, "'{}{}'", v, display_optional_type(otype)),
Token::Number((Some(2), otype, v)) => {
write!(f, "'0b{:b}{}'", v, display_optional_type(otype))
}
Token::Number((Some(8), otype, v)) => {
write!(f, "'0o{:o}{}'", v, display_optional_type(otype))
}
Token::Number((Some(10), otype, v)) => {
write!(f, "'{}{}'", v, display_optional_type(otype))
}
Token::Number((Some(16), otype, v)) => {
write!(f, "'0x{:x}{}'", v, display_optional_type(otype))
}
Token::Number((Some(b), opt_type, v)) => {
write!(
f,
"Invalidly-based-number<base={},val={},opt_type={:?}>",
b, v, opt_type
)
}
Token::Variable(s) => write!(f, "'{}'", s),
Token::Error => write!(f, "<error>"),
@@ -122,6 +143,125 @@ impl Token {
}
}
#[repr(i64)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConstantType {
U8 = 10,
U16 = 11,
U32 = 12,
U64 = 13,
I8 = 20,
I16 = 21,
I32 = 22,
I64 = 23,
}
impl From<ConstantType> for cranelift_codegen::ir::Type {
fn from(value: ConstantType) -> Self {
match value {
ConstantType::I8 | ConstantType::U8 => cranelift_codegen::ir::types::I8,
ConstantType::I16 | ConstantType::U16 => cranelift_codegen::ir::types::I16,
ConstantType::I32 | ConstantType::U32 => cranelift_codegen::ir::types::I32,
ConstantType::I64 | ConstantType::U64 => cranelift_codegen::ir::types::I64,
}
}
}
impl ConstantType {
/// Returns true if the given type is (a) numeric and (b) signed;
pub fn is_signed(&self) -> bool {
matches!(
self,
ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64
)
}
/// Return the set of types that can be safely casted into this type.
pub fn safe_casts_to(self) -> Vec<ConstantType> {
match self {
ConstantType::I8 => vec![ConstantType::I8],
ConstantType::I16 => vec![ConstantType::I16, ConstantType::I8, ConstantType::U8],
ConstantType::I32 => vec![
ConstantType::I32,
ConstantType::I16,
ConstantType::I8,
ConstantType::U16,
ConstantType::U8,
],
ConstantType::I64 => vec![
ConstantType::I64,
ConstantType::I32,
ConstantType::I16,
ConstantType::I8,
ConstantType::U32,
ConstantType::U16,
ConstantType::U8,
],
ConstantType::U8 => vec![ConstantType::U8],
ConstantType::U16 => vec![ConstantType::U16, ConstantType::U8],
ConstantType::U32 => vec![ConstantType::U32, ConstantType::U16, ConstantType::U8],
ConstantType::U64 => vec![
ConstantType::U64,
ConstantType::U32,
ConstantType::U16,
ConstantType::U8,
],
}
}
/// Return the set of all currently-available constant types
pub fn all_types() -> Vec<Self> {
vec![
ConstantType::U8,
ConstantType::U16,
ConstantType::U32,
ConstantType::U64,
ConstantType::I8,
ConstantType::I16,
ConstantType::I32,
ConstantType::I64,
]
}
/// Return the name of the given type, as a string
pub fn name(&self) -> String {
match self {
ConstantType::I8 => "i8".to_string(),
ConstantType::I16 => "i16".to_string(),
ConstantType::I32 => "i32".to_string(),
ConstantType::I64 => "i64".to_string(),
ConstantType::U8 => "u8".to_string(),
ConstantType::U16 => "u16".to_string(),
ConstantType::U32 => "u32".to_string(),
ConstantType::U64 => "u64".to_string(),
}
}
}
#[derive(Debug, Error, PartialEq)]
pub enum InvalidConstantType {
#[error("Unrecognized constant {0} for constant type")]
Value(i64),
}
impl TryFrom<i64> for ConstantType {
type Error = InvalidConstantType;
fn try_from(value: i64) -> Result<Self, Self::Error> {
match value {
10 => Ok(ConstantType::U8),
11 => Ok(ConstantType::U16),
12 => Ok(ConstantType::U32),
13 => Ok(ConstantType::U64),
20 => Ok(ConstantType::I8),
21 => Ok(ConstantType::I16),
22 => Ok(ConstantType::I32),
23 => Ok(ConstantType::I64),
_ => Err(InvalidConstantType::Value(value)),
}
}
}
/// Parse a number in the given base, return a pair of the base and the
/// parsed number. This is just a helper used for all of the number
/// regular expression cases, which kicks off to the obvious Rust
@@ -129,24 +269,66 @@ impl Token {
fn parse_number(
base: Option<u8>,
value: &Lexer<Token>,
) -> Result<(Option<u8>, i64), ParseIntError> {
) -> Result<(Option<u8>, Option<ConstantType>, u64), ParseIntError> {
let (radix, strval) = match base {
None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]),
};
let intval = i64::from_str_radix(strval, radix as u32)?;
Ok((base, intval))
let (declared_type, strval) = if let Some(strval) = strval.strip_suffix("u8") {
(Some(ConstantType::U8), strval)
} else if let Some(strval) = strval.strip_suffix("u16") {
(Some(ConstantType::U16), strval)
} else if let Some(strval) = strval.strip_suffix("u32") {
(Some(ConstantType::U32), strval)
} else if let Some(strval) = strval.strip_suffix("u64") {
(Some(ConstantType::U64), strval)
} else if let Some(strval) = strval.strip_suffix("i8") {
(Some(ConstantType::I8), strval)
} else if let Some(strval) = strval.strip_suffix("i16") {
(Some(ConstantType::I16), strval)
} else if let Some(strval) = strval.strip_suffix("i32") {
(Some(ConstantType::I32), strval)
} else if let Some(strval) = strval.strip_suffix("i64") {
(Some(ConstantType::I64), strval)
} else {
(None, strval)
};
let intval = u64::from_str_radix(strval, radix as u32)?;
Ok((base, declared_type, intval))
}
fn display_optional_type(otype: &Option<ConstantType>) -> &'static str {
match otype {
None => "",
Some(ConstantType::I8) => "i8",
Some(ConstantType::I16) => "i16",
Some(ConstantType::I32) => "i32",
Some(ConstantType::I64) => "i64",
Some(ConstantType::U8) => "u8",
Some(ConstantType::U16) => "u16",
Some(ConstantType::U32) => "u32",
Some(ConstantType::U64) => "u64",
}
}
#[test]
fn lex_numbers() {
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc 12u8 0xci64// 9");
assert_eq!(lex0.next(), Some(Token::Number((None, None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(2), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(8), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(10), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(16), None, 12))));
assert_eq!(
lex0.next(),
Some(Token::Number((None, Some(ConstantType::U8), 12)))
);
assert_eq!(
lex0.next(),
Some(Token::Number((Some(16), Some(ConstantType::I64), 12)))
);
assert_eq!(lex0.next(), None);
}
@@ -168,6 +350,31 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
assert_eq!(lex0.next(), Some((Token::Number((None, None, 1)), 8..9)));
assert_eq!(lex0.next(), None);
}
#[test]
fn further_spans() {
let mut lex0 = Token::lexer("x = 2i64 + 2i64;\ny = -x;\nprint y;").spanned();
assert_eq!(lex0.next(), Some((Token::var("x"), 0..1)));
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
assert_eq!(
lex0.next(),
Some((Token::Number((None, Some(ConstantType::I64), 2)), 4..8))
);
assert_eq!(lex0.next(), Some((Token::Operator('+'), 9..10)));
assert_eq!(
lex0.next(),
Some((Token::Number((None, Some(ConstantType::I64), 2)), 11..15))
);
assert_eq!(lex0.next(), Some((Token::Semi, 15..16)));
assert_eq!(lex0.next(), Some((Token::var("y"), 17..18)));
assert_eq!(lex0.next(), Some((Token::Equals, 19..20)));
assert_eq!(lex0.next(), Some((Token::Operator('-'), 21..22)));
assert_eq!(lex0.next(), Some((Token::var("x"), 22..23)));
assert_eq!(lex0.next(), Some((Token::Semi, 23..24)));
assert_eq!(lex0.next(), Some((Token::Print, 25..30)));
assert_eq!(lex0.next(), Some((Token::var("y"), 31..32)));
assert_eq!(lex0.next(), Some((Token::Semi, 32..33)));
}