🤔 Add a type inference engine, along with typed literals. #4

Merged
acw merged 25 commits from acw/type-checker into develop 2023-09-19 20:40:05 -07:00
2 changed files with 94 additions and 27 deletions
Showing only changes of commit c0b65ef416 - Show all commits

View File

@@ -10,7 +10,7 @@
//! //!
use crate::syntax::{LexerError, Location}; use crate::syntax::{LexerError, Location};
use crate::syntax::ast::{Program,Statement,Expression,Value}; use crate::syntax::ast::{Program,Statement,Expression,Value};
use crate::syntax::tokens::Token; use crate::syntax::tokens::{ConstantType, Token};
use internment::ArcIntern; use internment::ArcIntern;
// one cool thing about lalrpop: we can pass arguments. in this case, the // one cool thing about lalrpop: we can pass arguments. in this case, the
@@ -44,7 +44,7 @@ extern {
// to name and use "their value", you get their source location. // to name and use "their value", you get their source location.
// For these, we want "their value" to be their actual contents, // For these, we want "their value" to be their actual contents,
// which is why we put their types in angle brackets. // which is why we put their types in angle brackets.
"<num>" => Token::Number((<Option<u8>>,<i64>)), "<num>" => Token::Number((<Option<u8>>,<Option<ConstantType>>,<i64>)),
"<var>" => Token::Variable(<ArcIntern<String>>), "<var>" => Token::Variable(<ArcIntern<String>>),
} }
} }
@@ -143,7 +143,7 @@ AtomicExpression: Expression = {
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()), <l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
// just a number // just a number
<l:@L> <n:"<num>"> => { <l:@L> <n:"<num>"> => {
let val = Value::Number(n.0, n.1); let val = Value::Number(n.0, n.2);
Expression::Value(Location::new(file_idx, l), val) Expression::Value(Location::new(file_idx, l), val)
}, },
// a tricky case: also just a number, but using a negative sign. an // a tricky case: also just a number, but using a negative sign. an
@@ -153,7 +153,7 @@ AtomicExpression: Expression = {
// write positive numbers which are immediately sent to a negation // write positive numbers which are immediately sent to a negation
// primitive! // primitive!
<l:@L> "-" <n:"<num>"> => { <l:@L> "-" <n:"<num>"> => {
let val = Value::Number(n.0, -n.1); let val = Value::Number(n.0, -n.2);
Expression::Value(Location::new(file_idx, l), val) Expression::Value(Location::new(file_idx, l), val)
}, },
// finally, let people parenthesize expressions and get back to a // finally, let people parenthesize expressions and get back to a

View File

@@ -53,13 +53,14 @@ pub enum Token {
/// Numbers capture both the value we read from the input, /// Numbers capture both the value we read from the input,
/// converted to an `i64`, as well as the base the user used /// converted to an `i64`, as well as the base the user used
/// to write the number, if they did so. /// to write the number and/or the type the user specified,
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))] /// if they did either.
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))] #[regex(r"0b[01]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(2), v))]
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))] #[regex(r"0o[0-7]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(8), v))]
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))] #[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))]
#[regex(r"[0-9]+", |v| parse_number(None, v))] #[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))]
Number((Option<u8>, i64)), #[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))]
Number((Option<u8>, Option<ConstantType>, i64)),
// Variables; this is a very standard, simple set of characters // Variables; this is a very standard, simple set of characters
// for variables, but feel free to experiment with more complicated // for variables, but feel free to experiment with more complicated
@@ -90,13 +91,25 @@ impl fmt::Display for Token {
Token::RightParen => write!(f, "')'"), Token::RightParen => write!(f, "')'"),
Token::Print => write!(f, "'print'"), Token::Print => write!(f, "'print'"),
Token::Operator(c) => write!(f, "'{}'", c), Token::Operator(c) => write!(f, "'{}'", c),
Token::Number((None, v)) => write!(f, "'{}'", v), Token::Number((None, otype, v)) => write!(f, "'{}{}'", v, display_optional_type(otype)),
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v), Token::Number((Some(2), otype, v)) => {
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v), write!(f, "'0b{:b}{}'", v, display_optional_type(otype))
Token::Number((Some(10), v)) => write!(f, "'{}'", v), }
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v), Token::Number((Some(8), otype, v)) => {
Token::Number((Some(b), v)) => { write!(f, "'0o{:o}{}'", v, display_optional_type(otype))
write!(f, "Invalidly-based-number<base={},val={}>", b, v) }
Token::Number((Some(10), otype, v)) => {
write!(f, "'{}{}'", v, display_optional_type(otype))
}
Token::Number((Some(16), otype, v)) => {
write!(f, "'0x{:x}{}'", v, display_optional_type(otype))
}
Token::Number((Some(b), opt_type, v)) => {
write!(
f,
"Invalidly-based-number<base={},val={},opt_type={:?}>",
b, v, opt_type
)
} }
Token::Variable(s) => write!(f, "'{}'", s), Token::Variable(s) => write!(f, "'{}'", s),
Token::Error => write!(f, "<error>"), Token::Error => write!(f, "<error>"),
@@ -122,6 +135,18 @@ impl Token {
} }
} }
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConstantType {
U8,
U16,
U32,
U64,
I8,
I16,
I32,
I64,
}
/// Parse a number in the given base, return a pair of the base and the /// Parse a number in the given base, return a pair of the base and the
/// parsed number. This is just a helper used for all of the number /// parsed number. This is just a helper used for all of the number
/// regular expression cases, which kicks off to the obvious Rust /// regular expression cases, which kicks off to the obvious Rust
@@ -129,24 +154,66 @@ impl Token {
fn parse_number( fn parse_number(
base: Option<u8>, base: Option<u8>,
value: &Lexer<Token>, value: &Lexer<Token>,
) -> Result<(Option<u8>, i64), ParseIntError> { ) -> Result<(Option<u8>, Option<ConstantType>, i64), ParseIntError> {
let (radix, strval) = match base { let (radix, strval) = match base {
None => (10, value.slice()), None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]), Some(radix) => (radix, &value.slice()[2..]),
}; };
let (declared_type, strval) = if let Some(strval) = strval.strip_suffix("u8") {
(Some(ConstantType::U8), strval)
} else if let Some(strval) = strval.strip_suffix("u16") {
(Some(ConstantType::U16), strval)
} else if let Some(strval) = strval.strip_suffix("u32") {
(Some(ConstantType::U32), strval)
} else if let Some(strval) = strval.strip_suffix("u64") {
(Some(ConstantType::U64), strval)
} else if let Some(strval) = strval.strip_suffix("i8") {
(Some(ConstantType::I8), strval)
} else if let Some(strval) = strval.strip_suffix("i16") {
(Some(ConstantType::I16), strval)
} else if let Some(strval) = strval.strip_suffix("i32") {
(Some(ConstantType::I32), strval)
} else if let Some(strval) = strval.strip_suffix("i64") {
(Some(ConstantType::I64), strval)
} else {
(None, strval)
};
let intval = i64::from_str_radix(strval, radix as u32)?; let intval = i64::from_str_radix(strval, radix as u32)?;
Ok((base, intval)) Ok((base, declared_type, intval))
}
fn display_optional_type(otype: &Option<ConstantType>) -> &'static str {
match otype {
None => "",
Some(ConstantType::I8) => "i8",
Some(ConstantType::I16) => "i16",
Some(ConstantType::I32) => "i32",
Some(ConstantType::I64) => "i64",
Some(ConstantType::U8) => "u8",
Some(ConstantType::U16) => "u16",
Some(ConstantType::U32) => "u32",
Some(ConstantType::U64) => "u64",
}
} }
#[test] #[test]
fn lex_numbers() { fn lex_numbers() {
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9"); let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc 12u8 0xci64// 9");
assert_eq!(lex0.next(), Some(Token::Number((None, 12)))); assert_eq!(lex0.next(), Some(Token::Number((None, None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(2), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(8), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(10), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12)))); assert_eq!(lex0.next(), Some(Token::Number((Some(16), None, 12))));
assert_eq!(
lex0.next(),
Some(Token::Number((None, Some(ConstantType::U8), 12)))
);
assert_eq!(
lex0.next(),
Some(Token::Number((Some(16), Some(ConstantType::I64), 12)))
);
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }
@@ -168,6 +235,6 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Equals, 2..3))); assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5))); assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7))); assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9))); assert_eq!(lex0.next(), Some((Token::Number((None, None, 1)), 8..9)));
assert_eq!(lex0.next(), None); assert_eq!(lex0.next(), None);
} }