🤔 Add a type inference engine, along with typed literals. #4

Merged
acw merged 25 commits from acw/type-checker into develop 2023-09-19 20:40:05 -07:00
9 changed files with 141 additions and 101 deletions
Showing only changes of commit 833c9d5350 - Show all commits

View File

@@ -134,10 +134,10 @@ impl REPL {
// if this is a variable binding, and we've never defined this variable before, // if this is a variable binding, and we've never defined this variable before,
// we should tell cranelift about it. this is optimistic; if we fail to compile, // we should tell cranelift about it. this is optimistic; if we fail to compile,
// then we won't use this definition until someone tries again. // then we won't use this definition until someone tries again.
if !self.variable_binding_sites.contains_key(&name) { if !self.variable_binding_sites.contains_key(&name.name) {
self.jitter.define_string(&name)?; self.jitter.define_string(&name.name)?;
self.jitter self.jitter
.define_variable(name.clone(), ConstantType::U64)?; .define_variable(name.to_string(), ConstantType::U64)?;
} }
crate::syntax::Program { crate::syntax::Program {

View File

@@ -73,12 +73,12 @@ pub enum ParserError {
/// Raised when we're parsing the file, and run into a token in a /// Raised when we're parsing the file, and run into a token in a
/// place we weren't expecting it. /// place we weren't expecting it.
#[error("Unrecognized token")] #[error("Unrecognized token")]
UnrecognizedToken(Location, Location, Token, Vec<String>), UnrecognizedToken(Location, Token, Vec<String>),
/// Raised when we were expecting the end of the file, but instead /// Raised when we were expecting the end of the file, but instead
/// got another token. /// got another token.
#[error("Extra token")] #[error("Extra token")]
ExtraToken(Location, Token, Location), ExtraToken(Location, Token),
/// Raised when the lexer just had some sort of internal problem /// Raised when the lexer just had some sort of internal problem
/// and just gave up. /// and just gave up.
@@ -106,30 +106,28 @@ impl ParserError {
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self { fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
match err { match err {
ParseError::InvalidToken { location } => { ParseError::InvalidToken { location } => {
ParserError::InvalidToken(Location::new(file_idx, location)) ParserError::InvalidToken(Location::new(file_idx, location..location+1))
} }
ParseError::UnrecognizedEof { location, expected } => { ParseError::UnrecognizedEof { location, expected } => {
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected) ParserError::UnrecognizedEOF(Location::new(file_idx, location..location+1), expected)
} }
ParseError::UnrecognizedToken { ParseError::UnrecognizedToken {
token: (start, token, end), token: (start, token, end),
expected, expected,
} => ParserError::UnrecognizedToken( } => ParserError::UnrecognizedToken(
Location::new(file_idx, start), Location::new(file_idx, start..end),
Location::new(file_idx, end),
token, token,
expected, expected,
), ),
ParseError::ExtraToken { ParseError::ExtraToken {
token: (start, token, end), token: (start, token, end),
} => ParserError::ExtraToken( } => ParserError::ExtraToken(
Location::new(file_idx, start), Location::new(file_idx, start..end),
token, token,
Location::new(file_idx, end),
), ),
ParseError::User { error } => match error { ParseError::User { error } => match error {
LexerError::LexFailure(offset) => { LexerError::LexFailure(offset) => {
ParserError::LexFailure(Location::new(file_idx, offset)) ParserError::LexFailure(Location::new(file_idx, offset..offset+1))
} }
}, },
} }
@@ -180,37 +178,29 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
), ),
// encountered a token where it shouldn't be // encountered a token where it shouldn't be
ParserError::UnrecognizedToken(start, end, token, expected) => { ParserError::UnrecognizedToken(loc, token, expected) => {
let expected_str = let expected_str =
format!("unexpected token {}{}", token, display_expected(expected)); format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token); let unexpected_str = format!("unexpected token {}", token);
let labels = start.range_label(end);
Diagnostic::error() Diagnostic::error()
.with_labels(
labels
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str) .with_message(expected_str)
.with_labels(vec![
loc.primary_label().with_message(unexpected_str)
])
} }
// I think we get this when we get a token, but were expected EOF // I think we get this when we get a token, but were expected EOF
ParserError::ExtraToken(start, token, end) => { ParserError::ExtraToken(loc, token) => {
let expected_str = let expected_str =
format!("unexpected token {} after the expected end of file", token); format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token); let unexpected_str = format!("unexpected token {}", token);
let labels = start.range_label(end);
Diagnostic::error() Diagnostic::error()
.with_labels(
labels
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str) .with_message(expected_str)
.with_labels(vec![
loc.primary_label().with_message(unexpected_str)
])
} }
// simple lexer errors // simple lexer errors
@@ -293,23 +283,23 @@ fn order_of_operations() {
Program::from_str(muladd1).unwrap(), Program::from_str(muladd1).unwrap(),
Program { Program {
statements: vec![Statement::Binding( statements: vec![Statement::Binding(
Location::new(testfile, 0), Location::new(testfile, 0..1),
"x".to_string(), Name::manufactured("x"),
Expression::Primitive( Expression::Primitive(
Location::new(testfile, 6), Location::new(testfile, 6..7),
"+".to_string(), "+".to_string(),
vec![ vec![
Expression::Value(Location::new(testfile, 4), Value::Number(None, None, 1),), Expression::Value(Location::new(testfile, 4..5), Value::Number(None, None, 1),),
Expression::Primitive( Expression::Primitive(
Location::new(testfile, 10), Location::new(testfile, 10..11),
"*".to_string(), "*".to_string(),
vec![ vec![
Expression::Value( Expression::Value(
Location::new(testfile, 8), Location::new(testfile, 8..9),
Value::Number(None, None, 2), Value::Number(None, None, 2),
), ),
Expression::Value( Expression::Value(
Location::new(testfile, 12), Location::new(testfile, 12..13),
Value::Number(None, None, 3), Value::Number(None, None, 3),
), ),
] ]

View File

@@ -1,4 +1,4 @@
use crate::syntax::ast::{ConstantType, Expression, Program, Statement, Value}; use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, Value};
use crate::syntax::location::Location; use crate::syntax::location::Location;
use proptest::sample::select; use proptest::sample::select;
use proptest::{ use proptest::{
@@ -10,15 +10,12 @@ use std::collections::HashMap;
const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*"; const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*";
const OPERATORS: &[(&str, usize)] = &[("+", 2), ("-", 1), ("-", 2), ("*", 2), ("/", 2)]; const OPERATORS: &[(&str, usize)] = &[("+", 2), ("-", 1), ("-", 2), ("*", 2), ("/", 2)];
#[derive(Clone, Debug)]
struct Name(String);
impl Arbitrary for Name { impl Arbitrary for Name {
type Parameters = (); type Parameters = ();
type Strategy = BoxedStrategy<Self>; type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
VALID_VARIABLE_NAMES.prop_map(Name).boxed() VALID_VARIABLE_NAMES.prop_map(Name::manufactured).boxed()
} }
} }
@@ -67,12 +64,12 @@ impl Arbitrary for Program {
output_type: Some(psi.binding_type), output_type: Some(psi.binding_type),
}); });
defined_variables.insert(psi.name.0.clone(), psi.binding_type); defined_variables.insert(psi.name.name.clone(), psi.binding_type);
statements.push( statements.push(
expr.prop_map(move |expr| { expr.prop_map(move |expr| {
Statement::Binding( Statement::Binding(
Location::manufactured(), Location::manufactured(),
psi.name.0.clone(), psi.name.clone(),
expr, expr,
) )
}) })
@@ -81,7 +78,7 @@ impl Arbitrary for Program {
} else { } else {
let printers = defined_variables let printers = defined_variables
.keys() .keys()
.map(|n| Just(Statement::Print(Location::manufactured(), n.clone()))); .map(|n| Just(Statement::Print(Location::manufactured(), Name::manufactured(n))));
statements.push(Union::new(printers).boxed()); statements.push(Union::new(printers).boxed());
} }
} }

View File

@@ -1,3 +1,8 @@
use std::fmt;
use std::hash::Hash;
use internment::ArcIntern;
pub use crate::syntax::tokens::ConstantType; pub use crate::syntax::tokens::ConstantType;
use crate::syntax::Location; use crate::syntax::Location;
@@ -14,6 +19,50 @@ pub struct Program {
pub statements: Vec<Statement>, pub statements: Vec<Statement>,
} }
/// A Name.
///
/// This is basically a string, but annotated with the place the string
/// is in the source file.
#[derive(Clone, Debug)]
pub struct Name {
pub name: String,
pub location: Location,
}
impl Name {
pub fn new<S: ToString>(n: S, location: Location) -> Name {
Name{ name: n.to_string(), location }
}
pub fn manufactured<S: ToString>(n: S) -> Name {
Name{ name: n.to_string(), location: Location::manufactured() }
}
pub fn intern(self) -> ArcIntern<String> {
ArcIntern::new(self.name)
}
}
impl PartialEq for Name {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Eq for Name {}
impl Hash for Name {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state)
}
}
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.name.fmt(f)
}
}
/// A parsed statement. /// A parsed statement.
/// ///
/// Statements are guaranteed to be syntactically valid, but may be /// Statements are guaranteed to be syntactically valid, but may be
@@ -27,8 +76,8 @@ pub struct Program {
/// thing, not if they are the exact same statement. /// thing, not if they are the exact same statement.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Statement { pub enum Statement {
Binding(Location, String, Expression), Binding(Location, Name, Expression),
Print(Location, String), Print(Location, Name),
} }
impl PartialEq for Statement { impl PartialEq for Statement {

View File

@@ -25,11 +25,11 @@ impl Program {
match stmt { match stmt {
Statement::Binding(_, name, value) => { Statement::Binding(_, name, value) => {
let actual_value = value.eval(&env)?; let actual_value = value.eval(&env)?;
env = env.extend(ArcIntern::new(name.clone()), actual_value); env = env.extend(name.clone().intern(), actual_value);
} }
Statement::Print(_, name) => { Statement::Print(_, name) => {
let value = env.lookup(ArcIntern::new(name.clone()))?; let value = env.lookup(name.clone().intern())?;
let line = format!("{} = {}\n", name, value); let line = format!("{} = {}\n", name, value);
stdout.push_str(&line); stdout.push_str(&line);
} }

View File

@@ -1,3 +1,5 @@
use std::ops::Range;
use codespan_reporting::diagnostic::{Diagnostic, Label}; use codespan_reporting::diagnostic::{Diagnostic, Label};
/// A source location, for use in pointing users towards warnings and errors. /// A source location, for use in pointing users towards warnings and errors.
@@ -7,7 +9,7 @@ use codespan_reporting::diagnostic::{Diagnostic, Label};
#[derive(Clone, Debug, Eq, Hash, PartialEq)] #[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Location { pub struct Location {
file_idx: usize, file_idx: usize,
offset: usize, location: Range<usize>,
} }
impl Location { impl Location {
@@ -17,8 +19,8 @@ impl Location {
/// The file index is based on the file database being used. See the /// The file index is based on the file database being used. See the
/// `codespan_reporting::files::SimpleFiles::add` function, which is /// `codespan_reporting::files::SimpleFiles::add` function, which is
/// normally where we get this index. /// normally where we get this index.
pub fn new(file_idx: usize, offset: usize) -> Self { pub fn new(file_idx: usize, location: Range<usize>) -> Self {
Location { file_idx, offset } Location { file_idx, location }
} }
/// Generate a `Location` for a completely manufactured bit of code. /// Generate a `Location` for a completely manufactured bit of code.
@@ -30,7 +32,7 @@ impl Location {
pub fn manufactured() -> Self { pub fn manufactured() -> Self {
Location { Location {
file_idx: 0, file_idx: 0,
offset: 0, location: 0..0,
} }
} }
@@ -47,7 +49,7 @@ impl Location {
/// actually happened), but you'd probably want to make the first location /// actually happened), but you'd probably want to make the first location
/// the secondary label to help users find it. /// the secondary label to help users find it.
pub fn primary_label(&self) -> Label<usize> { pub fn primary_label(&self) -> Label<usize> {
Label::primary(self.file_idx, self.offset..self.offset) Label::primary(self.file_idx, self.location.clone())
} }
/// Generate a secondary label for a [`Diagnostic`], based on this source /// Generate a secondary label for a [`Diagnostic`], based on this source
@@ -64,35 +66,7 @@ impl Location {
/// probably want to make the first location the secondary label to help /// probably want to make the first location the secondary label to help
/// users find it. /// users find it.
pub fn secondary_label(&self) -> Label<usize> { pub fn secondary_label(&self) -> Label<usize> {
Label::secondary(self.file_idx, self.offset..self.offset) Label::secondary(self.file_idx, self.location.clone())
}
/// Given this location and another, generate a primary label that
/// specifies the area between those two locations.
///
/// See [`Self::primary_label`] for some discussion of primary versus
/// secondary labels. If the two locations are the same, this method does
/// the exact same thing as [`Self::primary_label`]. If this item was
/// generated by [`Self::manufactured`], it will act as if you'd called
/// `primary_label` on the argument. Otherwise, it will generate the obvious
/// span.
///
/// This function will return `None` only in the case that you provide
/// labels from two different files, which it cannot sensibly handle.
pub fn range_label(&self, end: &Location) -> Option<Label<usize>> {
if self.file_idx == 0 {
return Some(end.primary_label());
}
if self.file_idx != end.file_idx {
return None;
}
if self.offset > end.offset {
Some(Label::primary(self.file_idx, end.offset..self.offset))
} else {
Some(Label::primary(self.file_idx, self.offset..end.offset))
}
} }
/// Return an error diagnostic centered at this location. /// Return an error diagnostic centered at this location.
@@ -104,7 +78,7 @@ impl Location {
pub fn error(&self) -> Diagnostic<usize> { pub fn error(&self) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary( Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx, self.file_idx,
self.offset..self.offset, self.location.clone(),
)]) )])
} }
@@ -117,8 +91,23 @@ impl Location {
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> { pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary( Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx, self.file_idx,
self.offset..self.offset, self.location.clone(),
) )
.with_message(msg)]) .with_message(msg)])
} }
/// Merge two locations into a single location spanning the whole range between
/// them.
///
/// This function returns None if the locations are from different files; this
/// can happen if one of the locations is manufactured, for example.
pub fn merge(&self, other: &Self) -> Option<Self> {
if self.file_idx != other.file_idx {
None
} else {
let start = if self.location.start <= other.location.start { self.location.start } else { other.location.start };
let end = if self.location.end >= other.location.end { self.location.end } else { other.location.end };
Some(Location { file_idx: self.file_idx, location: start..end })
}
}
} }

View File

@@ -9,7 +9,7 @@
//! eventually want to leave lalrpop behind.) //! eventually want to leave lalrpop behind.)
//! //!
use crate::syntax::{LexerError, Location}; use crate::syntax::{LexerError, Location};
use crate::syntax::ast::{Program,Statement,Expression,Value}; use crate::syntax::ast::{Program,Statement,Expression,Value,Name};
use crate::syntax::tokens::{ConstantType, Token}; use crate::syntax::tokens::{ConstantType, Token};
use internment::ArcIntern; use internment::ArcIntern;
@@ -91,10 +91,19 @@ pub Statement: Statement = {
// A statement can be a variable binding. Note, here, that we use this // A statement can be a variable binding. Note, here, that we use this
// funny @L thing to get the source location before the variable, so that // funny @L thing to get the source location before the variable, so that
// we can say that this statement spans across everything. // we can say that this statement spans across everything.
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e), <ls: @L> <v:"<var>"> <var_end: @L> "=" <e:Expression> ";" <le: @L> =>
Statement::Binding(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, ls..var_end)),
e,
),
// Alternatively, a statement can just be a print statement. // Alternatively, a statement can just be a print statement.
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()), <ls: @L> "print" <name_start: @L> <v:"<var>"> <name_end: @L> ";" <le: @L> =>
Statement::Print(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, name_start..name_end)),
),
} }
// Expressions! Expressions are a little fiddly, because we're going to // Expressions! Expressions are a little fiddly, because we're going to
@@ -126,21 +135,27 @@ Expression: Expression = {
// we group addition and subtraction under the heading "additive" // we group addition and subtraction under the heading "additive"
AdditiveExpression: Expression = { AdditiveExpression: Expression = {
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]), <ls: @L> <e1:AdditiveExpression> <l: @L> "+" <e2:MultiplicativeExpression> <le: @L> =>
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]), Expression::Primitive(Location::new(file_idx, ls..le), "+".to_string(), vec![e1, e2]),
<ls: @L> <e1:AdditiveExpression> <l: @L> "-" <e2:MultiplicativeExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "-".to_string(), vec![e1, e2]),
MultiplicativeExpression, MultiplicativeExpression,
} }
// similarly, we group multiplication and division under "multiplicative" // similarly, we group multiplication and division under "multiplicative"
MultiplicativeExpression: Expression = { MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]), <ls: @L> <e1:MultiplicativeExpression> <l: @L> "*" <e2:UnaryExpression> <le: @L> =>
<e1:MultiplicativeExpression> <l:@L> "/" <e2:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]), Expression::Primitive(Location::new(file_idx, ls..le), "*".to_string(), vec![e1, e2]),
<ls: @L> <e1:MultiplicativeExpression> <l: @L> "/" <e2:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "/".to_string(), vec![e1, e2]),
UnaryExpression, UnaryExpression,
} }
UnaryExpression: Expression = { UnaryExpression: Expression = {
<l:@L> "-" <e:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e]), <l: @L> "-" <e:UnaryExpression> <le: @L> =>
<l:@L> "<" <v:"<var>"> ">" <e:UnaryExpression> => Expression::Cast(Location::new(file_idx, l), v.to_string(), Box::new(e)), Expression::Primitive(Location::new(file_idx, l..le), "-".to_string(), vec![e]),
<l: @L> "<" <v:"<var>"> ">" <e:UnaryExpression> <le: @L> =>
Expression::Cast(Location::new(file_idx, l..le), v.to_string(), Box::new(e)),
AtomicExpression, AtomicExpression,
} }
@@ -148,9 +163,9 @@ UnaryExpression: Expression = {
// they cannot be further divided into parts // they cannot be further divided into parts
AtomicExpression: Expression = { AtomicExpression: Expression = {
// just a variable reference // just a variable reference
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()), <l: @L> <v:"<var>"> <end: @L> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()),
// just a number // just a number
<l:@L> <n:"<num>"> => Expression::Value(Location::new(file_idx, l), Value::Number(n.0, n.1, n.2)), <l: @L> <n:"<num>"> <end: @L> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)),
// finally, let people parenthesize expressions and get back to a // finally, let people parenthesize expressions and get back to a
// lower precedence // lower precedence
"(" <e:Expression> ")" => e, "(" <e:Expression> ")" => e,

View File

@@ -117,20 +117,20 @@ impl Statement {
errors.append(&mut exp_errors); errors.append(&mut exp_errors);
warnings.append(&mut exp_warnings); warnings.append(&mut exp_warnings);
if let Some(original_binding_site) = bound_variables.get(var) { if let Some(original_binding_site) = bound_variables.get(&var.name) {
warnings.push(Warning::ShadowedVariable( warnings.push(Warning::ShadowedVariable(
original_binding_site.clone(), original_binding_site.clone(),
loc.clone(), loc.clone(),
var.clone(), var.to_string(),
)); ));
} else { } else {
bound_variables.insert(var.clone(), loc.clone()); bound_variables.insert(var.to_string(), loc.clone());
} }
} }
Statement::Print(_, var) if bound_variables.contains_key(var) => {} Statement::Print(_, var) if bound_variables.contains_key(&var.name) => {}
Statement::Print(loc, var) => { Statement::Print(loc, var) => {
errors.push(Error::UnboundVariable(loc.clone(), var.clone())) errors.push(Error::UnboundVariable(loc.clone(), var.to_string()))
} }
} }

View File

@@ -53,7 +53,7 @@ fn convert_statement(
) -> Vec<ir::Statement> { ) -> Vec<ir::Statement> {
match statement { match statement {
syntax::Statement::Print(loc, name) => { syntax::Statement::Print(loc, name) => {
let iname = ArcIntern::new(name); let iname = ArcIntern::new(name.to_string());
let final_name = renames let final_name = renames
.get(&iname) .get(&iname)
.map(Clone::clone) .map(Clone::clone)
@@ -71,7 +71,7 @@ fn convert_statement(
syntax::Statement::Binding(loc, name, expr) => { syntax::Statement::Binding(loc, name, expr) => {
let (mut prereqs, expr, ty) = let (mut prereqs, expr, ty) =
convert_expression(expr, constraint_db, renames, bindings); convert_expression(expr, constraint_db, renames, bindings);
let iname = ArcIntern::new(name); let iname = ArcIntern::new(name.to_string());
let final_name = if bindings.contains_key(&iname) { let final_name = if bindings.contains_key(&iname) {
let new_name = ir::gensym(iname.as_str()); let new_name = ir::gensym(iname.as_str());
renames.insert(iname, new_name.clone()); renames.insert(iname, new_name.clone());