🤔 Add a type inference engine, along with typed literals. (#4)

The typed literal formatting mirrors that of Rust. If no type can be inferred for an untagged literal, the type inference engine will warn the user and then assume that they meant an unsigned 64-bit number. (This is slightly inconvenient, because there can be cases in which our Arbitrary instance may generate a unary negation, in which we should assume that it's a signed 64-bit number; we may want to revisit this later.) The type inference engine is a standard two phase one, in which we first generate a series of type constraints, and then we solve those constraints. In this particular implementation, we actually use a third phase to generate a final AST. Finally, to increase the amount of testing performed, I've removed the overflow checking in the evaluator. The only thing we now check for is division by zero. This does make things a trace slower in testing, but hopefully we get more coverage this way.
2023-09-19 20:40:05 -07:00
parent 1fbfd0c2d2
commit bd3b9af469
44 changed files with 3258 additions and 702 deletions
--- a/src/syntax/parser.lalrpop
+++ b/src/syntax/parser.lalrpop
@@ -9,8 +9,8 @@
 //! eventually want to leave lalrpop behind.)
 //!
 use crate::syntax::{LexerError, Location};
-use crate::syntax::ast::{Program,Statement,Expression,Value};
-use crate::syntax::tokens::Token;
+use crate::syntax::ast::{Program,Statement,Expression,Value,Name};
+use crate::syntax::tokens::{ConstantType, Token};
 use internment::ArcIntern;

 // one cool thing about lalrpop: we can pass arguments. in this case, the
@@ -32,6 +32,8 @@ extern {
        ";" => Token::Semi,
        "(" => Token::LeftParen,
        ")" => Token::RightParen,
+        "<" => Token::LessThan,
+        ">" => Token::GreaterThan,

        "print" => Token::Print,

@@ -44,7 +46,7 @@ extern {
        // to name and use "their value", you get their source location.
        // For these, we want "their value" to be their actual contents,
        // which is why we put their types in angle brackets.
-        "<num>" => Token::Number((<Option<u8>>,<i64>)),
+        "<num>" => Token::Number((<Option<u8>>,<Option<ConstantType>>,<u64>)),
        "<var>" => Token::Variable(<ArcIntern<String>>),
    }
 }
@@ -89,10 +91,19 @@ pub Statement: Statement = {
    // A statement can be a variable binding. Note, here, that we use this
    // funny @L thing to get the source location before the variable, so that
    // we can say that this statement spans across everything.
-    <l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
+    <ls: @L> <v:"<var>"> <var_end: @L> "=" <e:Expression> ";" <le: @L> =>
+        Statement::Binding(
+            Location::new(file_idx, ls..le),
+            Name::new(v, Location::new(file_idx, ls..var_end)),
+            e,
+        ),

    // Alternatively, a statement can just be a print statement.
-    "print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
+    <ls: @L> "print" <name_start: @L> <v:"<var>"> <name_end: @L> ";" <le: @L> =>
+        Statement::Print(
+            Location::new(file_idx, ls..le),
+            Name::new(v, Location::new(file_idx, name_start..name_end)),
+        ),
 }

 // Expressions! Expressions are a little fiddly, because we're going to
@@ -124,15 +135,27 @@ Expression: Expression = {

 // we group addition and subtraction under the heading "additive"
 AdditiveExpression: Expression = {
-    <e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
-    <e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
+    <ls: @L> <e1:AdditiveExpression> <l: @L> "+" <e2:MultiplicativeExpression> <le: @L> =>
+        Expression::Primitive(Location::new(file_idx, ls..le), "+".to_string(), vec![e1, e2]),
+    <ls: @L> <e1:AdditiveExpression> <l: @L> "-" <e2:MultiplicativeExpression> <le: @L> =>
+        Expression::Primitive(Location::new(file_idx, ls..le), "-".to_string(), vec![e1, e2]),
    MultiplicativeExpression,
 }

 // similarly, we group multiplication and division under "multiplicative"
 MultiplicativeExpression: Expression = {
-    <e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
-    <e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
+    <ls: @L> <e1:MultiplicativeExpression> <l: @L> "*" <e2:UnaryExpression> <le: @L> => 
+        Expression::Primitive(Location::new(file_idx, ls..le), "*".to_string(), vec![e1, e2]),
+    <ls: @L> <e1:MultiplicativeExpression> <l: @L> "/" <e2:UnaryExpression> <le: @L> =>
+        Expression::Primitive(Location::new(file_idx, ls..le), "/".to_string(), vec![e1, e2]),
+    UnaryExpression,
+}
+
+UnaryExpression: Expression = {
+    <l: @L> "-" <e:UnaryExpression> <le: @L> =>
+       Expression::Primitive(Location::new(file_idx, l..le), "-".to_string(), vec![e]),
+    <l: @L> "<" <v:"<var>"> ">" <e:UnaryExpression> <le: @L> =>
+       Expression::Cast(Location::new(file_idx, l..le), v.to_string(), Box::new(e)),
    AtomicExpression,
 }

@@ -140,22 +163,9 @@ MultiplicativeExpression: Expression = {
 // they cannot be further divided into parts
 AtomicExpression: Expression = {
    // just a variable reference
-    <l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
+    <l: @L> <v:"<var>"> <end: @L> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()),
    // just a number
-    <l:@L> <n:"<num>"> => {
-        let val = Value::Number(n.0, n.1);
-        Expression::Value(Location::new(file_idx, l), val)
-    },
-    // a tricky case: also just a number, but using a negative sign. an
-    // alternative way to do this -- and we may do this eventually -- is
-    // to implement a unary negation expression. this has the odd effect
-    // that the user never actually writes down a negative number; they just
-    // write positive numbers which are immediately sent to a negation
-    // primitive!
-    <l:@L> "-" <n:"<num>"> => {
-        let val = Value::Number(n.0, -n.1);
-        Expression::Value(Location::new(file_idx, l), val)
-    },
+    <l: @L> <n:"<num>"> <end: @L> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)),
    // finally, let people parenthesize expressions and get back to a
    // lower precedence
    "(" <e:Expression> ")" => e,