🤔 Add a type inference engine, along with typed literals. (#4)

The typed literal formatting mirrors that of Rust. If no type can be
inferred for an untagged literal, the type inference engine will warn
the user and then assume that they meant an unsigned 64-bit number.
(This is slightly inconvenient, because there can be cases in which our
Arbitrary instance may generate a unary negation, in which we should
assume that it's a signed 64-bit number; we may want to revisit this
later.)

The type inference engine is a standard two phase one, in which we first
generate a series of type constraints, and then we solve those
constraints. In this particular implementation, we actually use a third
phase to generate a final AST.

Finally, to increase the amount of testing performed, I've removed the
overflow checking in the evaluator. The only thing we now check for is
division by zero. This does make things a trace slower in testing, but
hopefully we get more coverage this way.
This commit was merged in pull request #4.
This commit is contained in:
2023-09-19 20:40:05 -07:00
committed by GitHub
parent 1fbfd0c2d2
commit bd3b9af469
44 changed files with 3258 additions and 702 deletions

View File

@@ -1,136 +1,189 @@
use std::collections::HashSet;
use crate::syntax::ast::{Expression, Program, Statement, Value};
use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, Value};
use crate::syntax::location::Location;
use proptest::sample::select;
use proptest::{
prelude::{Arbitrary, BoxedStrategy, Strategy},
strategy::{Just, Union},
};
use std::collections::HashMap;
use std::ops::Range;
const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*";
#[derive(Debug)]
struct Name(String);
impl ConstantType {
fn get_operators(&self) -> &'static [(&'static str, usize)] {
match self {
ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64 => {
&[("+", 2), ("-", 1), ("-", 2), ("*", 2), ("/", 2)]
}
ConstantType::U8 | ConstantType::U16 | ConstantType::U32 | ConstantType::U64 => {
&[("+", 2), ("-", 2), ("*", 2), ("/", 2)]
}
}
}
}
#[derive(Clone)]
pub struct GenerationEnvironment {
allow_inference: bool,
block_length: Range<usize>,
bindings: HashMap<Name, ConstantType>,
return_type: ConstantType,
}
impl Default for GenerationEnvironment {
fn default() -> Self {
GenerationEnvironment {
allow_inference: true,
block_length: 2..10,
bindings: HashMap::new(),
return_type: ConstantType::U64,
}
}
}
impl GenerationEnvironment {
pub fn new(allow_inference: bool) -> Self {
GenerationEnvironment {
allow_inference,
..Default::default()
}
}
}
impl Arbitrary for Program {
type Parameters = GenerationEnvironment;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy {
proptest::collection::vec(
ProgramStatementInfo::arbitrary(),
genenv.block_length.clone(),
)
.prop_flat_map(move |mut items| {
let mut statements = Vec::new();
let mut genenv = genenv.clone();
for psi in items.drain(..) {
if genenv.bindings.is_empty() || psi.should_be_binding {
genenv.return_type = psi.binding_type;
let expr = Expression::arbitrary_with(genenv.clone());
genenv.bindings.insert(psi.name.clone(), psi.binding_type);
statements.push(
expr.prop_map(move |expr| {
Statement::Binding(Location::manufactured(), psi.name.clone(), expr)
})
.boxed(),
);
} else {
let printers = genenv.bindings.keys().map(|n| {
Just(Statement::Print(
Location::manufactured(),
Name::manufactured(n),
))
});
statements.push(Union::new(printers).boxed());
}
}
statements
.prop_map(|statements| Program { statements })
.boxed()
})
.boxed()
}
}
impl Arbitrary for Name {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
VALID_VARIABLE_NAMES.prop_map(Name).boxed()
VALID_VARIABLE_NAMES.prop_map(Name::manufactured).boxed()
}
}
impl Arbitrary for Program {
#[derive(Debug)]
struct ProgramStatementInfo {
should_be_binding: bool,
name: Name,
binding_type: ConstantType,
}
impl Arbitrary for ProgramStatementInfo {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
let optionals = Vec::<Option<Name>>::arbitrary();
optionals
.prop_flat_map(|mut possible_names| {
let mut statements = Vec::new();
let mut defined_variables: HashSet<String> = HashSet::new();
for possible_name in possible_names.drain(..) {
match possible_name {
None if defined_variables.is_empty() => continue,
None => statements.push(
Union::new(defined_variables.iter().map(|name| {
Just(Statement::Print(Location::manufactured(), name.to_string()))
}))
.boxed(),
),
Some(new_name) => {
let closures_name = new_name.0.clone();
let retval =
Expression::arbitrary_with(Some(defined_variables.clone()))
.prop_map(move |exp| {
Statement::Binding(
Location::manufactured(),
closures_name.clone(),
exp,
)
})
.boxed();
defined_variables.insert(new_name.0);
statements.push(retval);
}
}
}
statements
})
.prop_map(|statements| Program { statements })
fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
(
Union::new(vec![Just(true), Just(true), Just(false)]),
Name::arbitrary(),
ConstantType::arbitrary(),
)
.prop_map(
|(should_be_binding, name, binding_type)| ProgramStatementInfo {
should_be_binding,
name,
binding_type,
},
)
.boxed()
}
}
impl Arbitrary for Statement {
type Parameters = Option<HashSet<String>>;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
let duplicated_args = args.clone();
let defined_variables = args.unwrap_or_default();
let binding_strategy = (
VALID_VARIABLE_NAMES,
Expression::arbitrary_with(duplicated_args),
)
.prop_map(|(name, exp)| Statement::Binding(Location::manufactured(), name, exp))
.boxed();
if defined_variables.is_empty() {
binding_strategy
} else {
let print_strategy = Union::new(
defined_variables
.iter()
.map(|x| Just(Statement::Print(Location::manufactured(), x.to_string()))),
)
.boxed();
Union::new([binding_strategy, print_strategy]).boxed()
}
}
}
impl Arbitrary for Expression {
type Parameters = Option<HashSet<String>>;
type Parameters = GenerationEnvironment;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
let defined_variables = args.unwrap_or_default();
let value_strategy = Value::arbitrary()
.prop_map(move |x| Expression::Value(Location::manufactured(), x))
fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy {
// Value(Location, Value). These are the easiest variations to create, because we can always
// create one.
let value_strategy = Value::arbitrary_with(genenv.clone())
.prop_map(|x| Expression::Value(Location::manufactured(), x))
.boxed();
let leaf_strategy = if defined_variables.is_empty() {
// Reference(Location, String), These are slightly trickier, because we can end up in a situation
// where either no variables are defined, or where none of the defined variables have a type we
// can work with. So what we're going to do is combine this one with the previous one as a "leaf
// strategy" -- our non-recursive items -- if we can, or just set that to be the value strategy
// if we can't actually create an references.
let mut bound_variables_of_type = genenv
.bindings
.iter()
.filter(|(_, v)| genenv.return_type == **v)
.map(|(n, _)| n)
.collect::<Vec<_>>();
let leaf_strategy = if bound_variables_of_type.is_empty() {
value_strategy
} else {
let reference_strategy = Union::new(defined_variables.iter().map(|x| {
Just(Expression::Reference(
Location::manufactured(),
x.to_owned(),
))
}))
.boxed();
Union::new([value_strategy, reference_strategy]).boxed()
let mut strats = bound_variables_of_type
.drain(..)
.map(|x| {
Just(Expression::Reference(
Location::manufactured(),
x.name.clone(),
))
.boxed()
})
.collect::<Vec<_>>();
strats.push(value_strategy);
Union::new(strats).boxed()
};
// now we generate our recursive types, given our leaf strategy
leaf_strategy
.prop_recursive(3, 64, 2, move |inner| {
.prop_recursive(3, 10, 2, move |strat| {
(
select(super::BINARY_OPERATORS),
proptest::collection::vec(inner, 2),
select(genenv.return_type.get_operators()),
strat.clone(),
strat,
)
.prop_map(move |(operator, exprs)| {
Expression::Primitive(Location::manufactured(), operator.to_string(), exprs)
.prop_map(|((oper, count), left, right)| {
let mut args = vec![left, right];
while args.len() > count {
args.pop();
}
Expression::Primitive(Location::manufactured(), oper.to_string(), args)
})
})
.boxed()
@@ -138,22 +191,57 @@ impl Arbitrary for Expression {
}
impl Arbitrary for Value {
type Parameters = ();
type Parameters = GenerationEnvironment;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
let base_strategy = Union::new([
fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy {
let printed_base_strategy = Union::new([
Just(None::<u8>),
Just(Some(2)),
Just(Some(8)),
Just(Some(10)),
Just(Some(16)),
]);
let value_strategy = u64::arbitrary();
let value_strategy = i64::arbitrary();
(base_strategy, value_strategy)
.prop_map(move |(base, value)| Value::Number(base, value))
(printed_base_strategy, bool::arbitrary(), value_strategy)
.prop_map(move |(base, declare_type, value)| {
let converted_value = match genenv.return_type {
ConstantType::I8 => value % (i8::MAX as u64),
ConstantType::U8 => value % (u8::MAX as u64),
ConstantType::I16 => value % (i16::MAX as u64),
ConstantType::U16 => value % (u16::MAX as u64),
ConstantType::I32 => value % (i32::MAX as u64),
ConstantType::U32 => value % (u32::MAX as u64),
ConstantType::I64 => value % (i64::MAX as u64),
ConstantType::U64 => value,
};
let ty = if declare_type || !genenv.allow_inference {
Some(genenv.return_type)
} else {
None
};
Value::Number(base, ty, converted_value)
})
.boxed()
}
}
impl Arbitrary for ConstantType {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
Union::new([
Just(ConstantType::I8),
Just(ConstantType::I16),
Just(ConstantType::I32),
Just(ConstantType::I64),
Just(ConstantType::U8),
Just(ConstantType::U16),
Just(ConstantType::U32),
Just(ConstantType::U64),
])
.boxed()
}
}

View File

@@ -1,7 +1,10 @@
use crate::syntax::Location;
use std::fmt;
use std::hash::Hash;
/// The set of valid binary operators.
pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"];
use internment::ArcIntern;
pub use crate::syntax::tokens::ConstantType;
use crate::syntax::Location;
/// A structure represented a parsed program.
///
@@ -16,6 +19,56 @@ pub struct Program {
pub statements: Vec<Statement>,
}
/// A Name.
///
/// This is basically a string, but annotated with the place the string
/// is in the source file.
#[derive(Clone, Debug)]
pub struct Name {
pub name: String,
pub location: Location,
}
impl Name {
pub fn new<S: ToString>(n: S, location: Location) -> Name {
Name {
name: n.to_string(),
location,
}
}
pub fn manufactured<S: ToString>(n: S) -> Name {
Name {
name: n.to_string(),
location: Location::manufactured(),
}
}
pub fn intern(self) -> ArcIntern<String> {
ArcIntern::new(self.name)
}
}
impl PartialEq for Name {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Eq for Name {}
impl Hash for Name {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state)
}
}
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.name.fmt(f)
}
}
/// A parsed statement.
///
/// Statements are guaranteed to be syntactically valid, but may be
@@ -29,8 +82,8 @@ pub struct Program {
/// thing, not if they are the exact same statement.
#[derive(Clone, Debug)]
pub enum Statement {
Binding(Location, String, Expression),
Print(Location, String),
Binding(Location, Name, Expression),
Print(Location, Name),
}
impl PartialEq for Statement {
@@ -58,6 +111,7 @@ impl PartialEq for Statement {
pub enum Expression {
Value(Location, Value),
Reference(Location, String),
Cast(Location, String, Box<Expression>),
Primitive(Location, String, Vec<Expression>),
}
@@ -72,6 +126,10 @@ impl PartialEq for Expression {
Expression::Reference(_, var2) => var1 == var2,
_ => false,
},
Expression::Cast(_, t1, e1) => match other {
Expression::Cast(_, t2, e2) => t1 == t2 && e1 == e2,
_ => false,
},
Expression::Primitive(_, prim1, args1) => match other {
Expression::Primitive(_, prim2, args2) => prim1 == prim2 && args1 == args2,
_ => false,
@@ -83,6 +141,12 @@ impl PartialEq for Expression {
/// A value from the source syntax
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Value {
/// The value of the number, and an optional base that it was written in
Number(Option<u8>, i64),
/// The value of the number, an optional base that it was written in, and any
/// type information provided.
///
/// u64 is chosen because it should be big enough to carry the amount of
/// information we need, and technically we interpret -4 as the primitive unary
/// operation "-" on the number 4. We'll translate this into a type-specific
/// number at a later time.
Number(Option<u8>, Option<ConstantType>, u64),
}

View File

@@ -1,7 +1,8 @@
use internment::ArcIntern;
use crate::eval::{EvalEnvironment, EvalError, Value};
use crate::syntax::{Expression, Program, Statement};
use crate::eval::{EvalEnvironment, EvalError, PrimitiveType, Value};
use crate::syntax::{ConstantType, Expression, Program, Statement};
use std::str::FromStr;
impl Program {
/// Evaluate the program, returning either an error or what it prints out when run.
@@ -24,11 +25,11 @@ impl Program {
match stmt {
Statement::Binding(_, name, value) => {
let actual_value = value.eval(&env)?;
env = env.extend(ArcIntern::new(name.clone()), actual_value);
env = env.extend(name.clone().intern(), actual_value);
}
Statement::Print(_, name) => {
let value = env.lookup(ArcIntern::new(name.clone()))?;
let value = env.lookup(name.clone().intern())?;
let line = format!("{} = {}\n", name, value);
stdout.push_str(&line);
}
@@ -43,11 +44,28 @@ impl Expression {
fn eval(&self, env: &EvalEnvironment) -> Result<Value, EvalError> {
match self {
Expression::Value(_, v) => match v {
super::Value::Number(_, v) => Ok(Value::I64(*v)),
super::Value::Number(_, ty, v) => match ty {
None => Ok(Value::U64(*v)),
// FIXME: make these types validate their input size
Some(ConstantType::I8) => Ok(Value::I8(*v as i8)),
Some(ConstantType::I16) => Ok(Value::I16(*v as i16)),
Some(ConstantType::I32) => Ok(Value::I32(*v as i32)),
Some(ConstantType::I64) => Ok(Value::I64(*v as i64)),
Some(ConstantType::U8) => Ok(Value::U8(*v as u8)),
Some(ConstantType::U16) => Ok(Value::U16(*v as u16)),
Some(ConstantType::U32) => Ok(Value::U32(*v as u32)),
Some(ConstantType::U64) => Ok(Value::U64(*v)),
},
},
Expression::Reference(_, n) => Ok(env.lookup(ArcIntern::new(n.clone()))?),
Expression::Cast(_, target, expr) => {
let target_type = PrimitiveType::from_str(target)?;
let value = expr.eval(env)?;
Ok(target_type.safe_cast(&value)?)
}
Expression::Primitive(_, op, args) => {
let mut arg_values = Vec::with_capacity(args.len());
@@ -66,12 +84,12 @@ impl Expression {
fn two_plus_three() {
let input = Program::parse(0, "x = 2 + 3; print x;").expect("parse works");
let output = input.eval().expect("runs successfully");
assert_eq!("x = 5i64\n", &output);
assert_eq!("x = 5u64\n", &output);
}
#[test]
fn lotsa_math() {
let input = Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works");
let output = input.eval().expect("runs successfully");
assert_eq!("x = 7i64\n", &output);
assert_eq!("x = 7u64\n", &output);
}

View File

@@ -1,13 +1,15 @@
use std::ops::Range;
use codespan_reporting::diagnostic::{Diagnostic, Label};
/// A source location, for use in pointing users towards warnings and errors.
///
/// Internally, locations are very tied to the `codespan_reporting` library,
/// and the primary use of them is to serve as anchors within that library.
#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Location {
file_idx: usize,
offset: usize,
location: Range<usize>,
}
impl Location {
@@ -17,8 +19,8 @@ impl Location {
/// The file index is based on the file database being used. See the
/// `codespan_reporting::files::SimpleFiles::add` function, which is
/// normally where we get this index.
pub fn new(file_idx: usize, offset: usize) -> Self {
Location { file_idx, offset }
pub fn new(file_idx: usize, location: Range<usize>) -> Self {
Location { file_idx, location }
}
/// Generate a `Location` for a completely manufactured bit of code.
@@ -30,7 +32,7 @@ impl Location {
pub fn manufactured() -> Self {
Location {
file_idx: 0,
offset: 0,
location: 0..0,
}
}
@@ -47,7 +49,7 @@ impl Location {
/// actually happened), but you'd probably want to make the first location
/// the secondary label to help users find it.
pub fn primary_label(&self) -> Label<usize> {
Label::primary(self.file_idx, self.offset..self.offset)
Label::primary(self.file_idx, self.location.clone())
}
/// Generate a secondary label for a [`Diagnostic`], based on this source
@@ -64,35 +66,7 @@ impl Location {
/// probably want to make the first location the secondary label to help
/// users find it.
pub fn secondary_label(&self) -> Label<usize> {
Label::secondary(self.file_idx, self.offset..self.offset)
}
/// Given this location and another, generate a primary label that
/// specifies the area between those two locations.
///
/// See [`Self::primary_label`] for some discussion of primary versus
/// secondary labels. If the two locations are the same, this method does
/// the exact same thing as [`Self::primary_label`]. If this item was
/// generated by [`Self::manufactured`], it will act as if you'd called
/// `primary_label` on the argument. Otherwise, it will generate the obvious
/// span.
///
/// This function will return `None` only in the case that you provide
/// labels from two different files, which it cannot sensibly handle.
pub fn range_label(&self, end: &Location) -> Option<Label<usize>> {
if self.file_idx == 0 {
return Some(end.primary_label());
}
if self.file_idx != end.file_idx {
return None;
}
if self.offset > end.offset {
Some(Label::primary(self.file_idx, end.offset..self.offset))
} else {
Some(Label::primary(self.file_idx, self.offset..end.offset))
}
Label::secondary(self.file_idx, self.location.clone())
}
/// Return an error diagnostic centered at this location.
@@ -102,10 +76,7 @@ impl Location {
/// this particular location. You'll need to extend it with actually useful
/// information, like what kind of error it is.
pub fn error(&self) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx,
self.offset..self.offset,
)])
Diagnostic::error().with_labels(vec![Label::primary(self.file_idx, self.location.clone())])
}
/// Return an error diagnostic centered at this location, with the given message.
@@ -115,10 +86,34 @@ impl Location {
/// even more information to ut, using [`Diagnostic::with_labels`],
/// [`Diagnostic::with_notes`], or [`Diagnostic::with_code`].
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx,
self.offset..self.offset,
)
.with_message(msg)])
Diagnostic::error().with_labels(vec![
Label::primary(self.file_idx, self.location.clone()).with_message(msg)
])
}
/// Merge two locations into a single location spanning the whole range between
/// them.
///
/// This function returns None if the locations are from different files; this
/// can happen if one of the locations is manufactured, for example.
pub fn merge(&self, other: &Self) -> Option<Self> {
if self.file_idx != other.file_idx {
None
} else {
let start = if self.location.start <= other.location.start {
self.location.start
} else {
other.location.start
};
let end = if self.location.end >= other.location.end {
self.location.end
} else {
other.location.end
};
Some(Location {
file_idx: self.file_idx,
location: start..end,
})
}
}
}

View File

@@ -9,8 +9,8 @@
//! eventually want to leave lalrpop behind.)
//!
use crate::syntax::{LexerError, Location};
use crate::syntax::ast::{Program,Statement,Expression,Value};
use crate::syntax::tokens::Token;
use crate::syntax::ast::{Program,Statement,Expression,Value,Name};
use crate::syntax::tokens::{ConstantType, Token};
use internment::ArcIntern;
// one cool thing about lalrpop: we can pass arguments. in this case, the
@@ -32,6 +32,8 @@ extern {
";" => Token::Semi,
"(" => Token::LeftParen,
")" => Token::RightParen,
"<" => Token::LessThan,
">" => Token::GreaterThan,
"print" => Token::Print,
@@ -44,7 +46,7 @@ extern {
// to name and use "their value", you get their source location.
// For these, we want "their value" to be their actual contents,
// which is why we put their types in angle brackets.
"<num>" => Token::Number((<Option<u8>>,<i64>)),
"<num>" => Token::Number((<Option<u8>>,<Option<ConstantType>>,<u64>)),
"<var>" => Token::Variable(<ArcIntern<String>>),
}
}
@@ -89,10 +91,19 @@ pub Statement: Statement = {
// A statement can be a variable binding. Note, here, that we use this
// funny @L thing to get the source location before the variable, so that
// we can say that this statement spans across everything.
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
<ls: @L> <v:"<var>"> <var_end: @L> "=" <e:Expression> ";" <le: @L> =>
Statement::Binding(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, ls..var_end)),
e,
),
// Alternatively, a statement can just be a print statement.
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
<ls: @L> "print" <name_start: @L> <v:"<var>"> <name_end: @L> ";" <le: @L> =>
Statement::Print(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, name_start..name_end)),
),
}
// Expressions! Expressions are a little fiddly, because we're going to
@@ -124,15 +135,27 @@ Expression: Expression = {
// we group addition and subtraction under the heading "additive"
AdditiveExpression: Expression = {
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
<ls: @L> <e1:AdditiveExpression> <l: @L> "+" <e2:MultiplicativeExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "+".to_string(), vec![e1, e2]),
<ls: @L> <e1:AdditiveExpression> <l: @L> "-" <e2:MultiplicativeExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "-".to_string(), vec![e1, e2]),
MultiplicativeExpression,
}
// similarly, we group multiplication and division under "multiplicative"
MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
<ls: @L> <e1:MultiplicativeExpression> <l: @L> "*" <e2:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "*".to_string(), vec![e1, e2]),
<ls: @L> <e1:MultiplicativeExpression> <l: @L> "/" <e2:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "/".to_string(), vec![e1, e2]),
UnaryExpression,
}
UnaryExpression: Expression = {
<l: @L> "-" <e:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, l..le), "-".to_string(), vec![e]),
<l: @L> "<" <v:"<var>"> ">" <e:UnaryExpression> <le: @L> =>
Expression::Cast(Location::new(file_idx, l..le), v.to_string(), Box::new(e)),
AtomicExpression,
}
@@ -140,22 +163,9 @@ MultiplicativeExpression: Expression = {
// they cannot be further divided into parts
AtomicExpression: Expression = {
// just a variable reference
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
<l: @L> <v:"<var>"> <end: @L> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()),
// just a number
<l:@L> <n:"<num>"> => {
let val = Value::Number(n.0, n.1);
Expression::Value(Location::new(file_idx, l), val)
},
// a tricky case: also just a number, but using a negative sign. an
// alternative way to do this -- and we may do this eventually -- is
// to implement a unary negation expression. this has the odd effect
// that the user never actually writes down a negative number; they just
// write positive numbers which are immediately sent to a negation
// primitive!
<l:@L> "-" <n:"<num>"> => {
let val = Value::Number(n.0, -n.1);
Expression::Value(Location::new(file_idx, l), val)
},
<l: @L> <n:"<num>"> <end: @L> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)),
// finally, let people parenthesize expressions and get back to a
// lower precedence
"(" <e:Expression> ")" => e,

View File

@@ -1,6 +1,8 @@
use crate::syntax::ast::{Expression, Program, Statement, Value, BINARY_OPERATORS};
use crate::syntax::ast::{Expression, Program, Statement, Value};
use pretty::{DocAllocator, DocBuilder, Pretty};
use super::ConstantType;
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
where
A: 'a,
@@ -50,14 +52,14 @@ where
match self {
Expression::Value(_, val) => val.pretty(allocator),
Expression::Reference(_, var) => allocator.text(var.to_string()),
Expression::Primitive(_, op, exprs) if BINARY_OPERATORS.contains(&op.as_ref()) => {
assert_eq!(
exprs.len(),
2,
"Found binary operator with {} components?",
exprs.len()
);
Expression::Cast(_, t, e) => allocator
.text(t.clone())
.angles()
.append(e.pretty(allocator)),
Expression::Primitive(_, op, exprs) if exprs.len() == 1 => allocator
.text(op.to_string())
.append(exprs[0].pretty(allocator)),
Expression::Primitive(_, op, exprs) if exprs.len() == 2 => {
let left = exprs[0].pretty(allocator);
let right = exprs[1].pretty(allocator);
@@ -84,15 +86,14 @@ where
{
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
match self {
Value::Number(opt_base, value) => {
let sign = if *value < 0 { "-" } else { "" };
Value::Number(opt_base, ty, value) => {
let value_str = match opt_base {
None => format!("{}", value),
Some(2) => format!("{}0b{:b}", sign, value.abs()),
Some(8) => format!("{}0o{:o}", sign, value.abs()),
Some(10) => format!("{}0d{}", sign, value.abs()),
Some(16) => format!("{}0x{:x}", sign, value.abs()),
Some(_) => format!("!!{}{:x}!!", sign, value.abs()),
None => format!("{}{}", value, type_suffix(ty)),
Some(2) => format!("0b{:b}{}", value, type_suffix(ty)),
Some(8) => format!("0o{:o}{}", value, type_suffix(ty)),
Some(10) => format!("0d{}{}", value, type_suffix(ty)),
Some(16) => format!("0x{:x}{}", value, type_suffix(ty)),
Some(_) => format!("!!{:x}{}!!", value, type_suffix(ty)),
};
allocator.text(value_str)
@@ -101,6 +102,20 @@ where
}
}
fn type_suffix(x: &Option<ConstantType>) -> &'static str {
match x {
None => "",
Some(ConstantType::I8) => "i8",
Some(ConstantType::I16) => "i16",
Some(ConstantType::I32) => "i32",
Some(ConstantType::I64) => "i64",
Some(ConstantType::U8) => "u8",
Some(ConstantType::U16) => "u16",
Some(ConstantType::U32) => "u32",
Some(ConstantType::U64) => "u64",
}
}
#[derive(Clone, Copy)]
struct CommaSep {}

View File

@@ -40,6 +40,12 @@ pub enum Token {
#[token(")")]
RightParen,
#[token("<")]
LessThan,
#[token(">")]
GreaterThan,
// Next we take of any reserved words; I always like to put
// these before we start recognizing more complicated regular
// expressions. I don't think it matters, but it works for me.
@@ -53,13 +59,14 @@ pub enum Token {
/// Numbers capture both the value we read from the input,
/// converted to an `i64`, as well as the base the user used
/// to write the number, if they did so.
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
#[regex(r"[0-9]+", |v| parse_number(None, v))]
Number((Option<u8>, i64)),
/// to write the number and/or the type the user specified,
/// if they did either.
#[regex(r"0b[01]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(2), v))]
#[regex(r"0o[0-7]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(8), v))]
#[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))]
#[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))]
#[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))]
Number((Option<u8>, Option<ConstantType>, u64)),
// Variables; this is a very standard, simple set of characters
// for variables, but feel free to experiment with more complicated
@@ -88,15 +95,29 @@ impl fmt::Display for Token {
Token::Semi => write!(f, "';'"),
Token::LeftParen => write!(f, "'('"),
Token::RightParen => write!(f, "')'"),
Token::LessThan => write!(f, "<"),
Token::GreaterThan => write!(f, ">"),
Token::Print => write!(f, "'print'"),
Token::Operator(c) => write!(f, "'{}'", c),
Token::Number((None, v)) => write!(f, "'{}'", v),
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
Token::Number((Some(b), v)) => {
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
Token::Number((None, otype, v)) => write!(f, "'{}{}'", v, display_optional_type(otype)),
Token::Number((Some(2), otype, v)) => {
write!(f, "'0b{:b}{}'", v, display_optional_type(otype))
}
Token::Number((Some(8), otype, v)) => {
write!(f, "'0o{:o}{}'", v, display_optional_type(otype))
}
Token::Number((Some(10), otype, v)) => {
write!(f, "'{}{}'", v, display_optional_type(otype))
}
Token::Number((Some(16), otype, v)) => {
write!(f, "'0x{:x}{}'", v, display_optional_type(otype))
}
Token::Number((Some(b), opt_type, v)) => {
write!(
f,
"Invalidly-based-number<base={},val={},opt_type={:?}>",
b, v, opt_type
)
}
Token::Variable(s) => write!(f, "'{}'", s),
Token::Error => write!(f, "<error>"),
@@ -122,6 +143,125 @@ impl Token {
}
}
#[repr(i64)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ConstantType {
U8 = 10,
U16 = 11,
U32 = 12,
U64 = 13,
I8 = 20,
I16 = 21,
I32 = 22,
I64 = 23,
}
impl From<ConstantType> for cranelift_codegen::ir::Type {
fn from(value: ConstantType) -> Self {
match value {
ConstantType::I8 | ConstantType::U8 => cranelift_codegen::ir::types::I8,
ConstantType::I16 | ConstantType::U16 => cranelift_codegen::ir::types::I16,
ConstantType::I32 | ConstantType::U32 => cranelift_codegen::ir::types::I32,
ConstantType::I64 | ConstantType::U64 => cranelift_codegen::ir::types::I64,
}
}
}
impl ConstantType {
/// Returns true if the given type is (a) numeric and (b) signed;
pub fn is_signed(&self) -> bool {
matches!(
self,
ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64
)
}
/// Return the set of types that can be safely casted into this type.
pub fn safe_casts_to(self) -> Vec<ConstantType> {
match self {
ConstantType::I8 => vec![ConstantType::I8],
ConstantType::I16 => vec![ConstantType::I16, ConstantType::I8, ConstantType::U8],
ConstantType::I32 => vec![
ConstantType::I32,
ConstantType::I16,
ConstantType::I8,
ConstantType::U16,
ConstantType::U8,
],
ConstantType::I64 => vec![
ConstantType::I64,
ConstantType::I32,
ConstantType::I16,
ConstantType::I8,
ConstantType::U32,
ConstantType::U16,
ConstantType::U8,
],
ConstantType::U8 => vec![ConstantType::U8],
ConstantType::U16 => vec![ConstantType::U16, ConstantType::U8],
ConstantType::U32 => vec![ConstantType::U32, ConstantType::U16, ConstantType::U8],
ConstantType::U64 => vec![
ConstantType::U64,
ConstantType::U32,
ConstantType::U16,
ConstantType::U8,
],
}
}
/// Return the set of all currently-available constant types
pub fn all_types() -> Vec<Self> {
vec![
ConstantType::U8,
ConstantType::U16,
ConstantType::U32,
ConstantType::U64,
ConstantType::I8,
ConstantType::I16,
ConstantType::I32,
ConstantType::I64,
]
}
/// Return the name of the given type, as a string
pub fn name(&self) -> String {
match self {
ConstantType::I8 => "i8".to_string(),
ConstantType::I16 => "i16".to_string(),
ConstantType::I32 => "i32".to_string(),
ConstantType::I64 => "i64".to_string(),
ConstantType::U8 => "u8".to_string(),
ConstantType::U16 => "u16".to_string(),
ConstantType::U32 => "u32".to_string(),
ConstantType::U64 => "u64".to_string(),
}
}
}
#[derive(Debug, Error, PartialEq)]
pub enum InvalidConstantType {
#[error("Unrecognized constant {0} for constant type")]
Value(i64),
}
impl TryFrom<i64> for ConstantType {
type Error = InvalidConstantType;
fn try_from(value: i64) -> Result<Self, Self::Error> {
match value {
10 => Ok(ConstantType::U8),
11 => Ok(ConstantType::U16),
12 => Ok(ConstantType::U32),
13 => Ok(ConstantType::U64),
20 => Ok(ConstantType::I8),
21 => Ok(ConstantType::I16),
22 => Ok(ConstantType::I32),
23 => Ok(ConstantType::I64),
_ => Err(InvalidConstantType::Value(value)),
}
}
}
/// Parse a number in the given base, return a pair of the base and the
/// parsed number. This is just a helper used for all of the number
/// regular expression cases, which kicks off to the obvious Rust
@@ -129,24 +269,66 @@ impl Token {
fn parse_number(
base: Option<u8>,
value: &Lexer<Token>,
) -> Result<(Option<u8>, i64), ParseIntError> {
) -> Result<(Option<u8>, Option<ConstantType>, u64), ParseIntError> {
let (radix, strval) = match base {
None => (10, value.slice()),
Some(radix) => (radix, &value.slice()[2..]),
};
let intval = i64::from_str_radix(strval, radix as u32)?;
Ok((base, intval))
let (declared_type, strval) = if let Some(strval) = strval.strip_suffix("u8") {
(Some(ConstantType::U8), strval)
} else if let Some(strval) = strval.strip_suffix("u16") {
(Some(ConstantType::U16), strval)
} else if let Some(strval) = strval.strip_suffix("u32") {
(Some(ConstantType::U32), strval)
} else if let Some(strval) = strval.strip_suffix("u64") {
(Some(ConstantType::U64), strval)
} else if let Some(strval) = strval.strip_suffix("i8") {
(Some(ConstantType::I8), strval)
} else if let Some(strval) = strval.strip_suffix("i16") {
(Some(ConstantType::I16), strval)
} else if let Some(strval) = strval.strip_suffix("i32") {
(Some(ConstantType::I32), strval)
} else if let Some(strval) = strval.strip_suffix("i64") {
(Some(ConstantType::I64), strval)
} else {
(None, strval)
};
let intval = u64::from_str_radix(strval, radix as u32)?;
Ok((base, declared_type, intval))
}
fn display_optional_type(otype: &Option<ConstantType>) -> &'static str {
match otype {
None => "",
Some(ConstantType::I8) => "i8",
Some(ConstantType::I16) => "i16",
Some(ConstantType::I32) => "i32",
Some(ConstantType::I64) => "i64",
Some(ConstantType::U8) => "u8",
Some(ConstantType::U16) => "u16",
Some(ConstantType::U32) => "u32",
Some(ConstantType::U64) => "u64",
}
}
#[test]
fn lex_numbers() {
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc 12u8 0xci64// 9");
assert_eq!(lex0.next(), Some(Token::Number((None, None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(2), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(8), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(10), None, 12))));
assert_eq!(lex0.next(), Some(Token::Number((Some(16), None, 12))));
assert_eq!(
lex0.next(),
Some(Token::Number((None, Some(ConstantType::U8), 12)))
);
assert_eq!(
lex0.next(),
Some(Token::Number((Some(16), Some(ConstantType::I64), 12)))
);
assert_eq!(lex0.next(), None);
}
@@ -168,6 +350,31 @@ fn lexer_spans() {
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
assert_eq!(lex0.next(), Some((Token::Number((None, None, 1)), 8..9)));
assert_eq!(lex0.next(), None);
}
#[test]
fn further_spans() {
let mut lex0 = Token::lexer("x = 2i64 + 2i64;\ny = -x;\nprint y;").spanned();
assert_eq!(lex0.next(), Some((Token::var("x"), 0..1)));
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
assert_eq!(
lex0.next(),
Some((Token::Number((None, Some(ConstantType::I64), 2)), 4..8))
);
assert_eq!(lex0.next(), Some((Token::Operator('+'), 9..10)));
assert_eq!(
lex0.next(),
Some((Token::Number((None, Some(ConstantType::I64), 2)), 11..15))
);
assert_eq!(lex0.next(), Some((Token::Semi, 15..16)));
assert_eq!(lex0.next(), Some((Token::var("y"), 17..18)));
assert_eq!(lex0.next(), Some((Token::Equals, 19..20)));
assert_eq!(lex0.next(), Some((Token::Operator('-'), 21..22)));
assert_eq!(lex0.next(), Some((Token::var("x"), 22..23)));
assert_eq!(lex0.next(), Some((Token::Semi, 23..24)));
assert_eq!(lex0.next(), Some((Token::Print, 25..30)));
assert_eq!(lex0.next(), Some((Token::var("y"), 31..32)));
assert_eq!(lex0.next(), Some((Token::Semi, 32..33)));
}

View File

@@ -1,6 +1,9 @@
use crate::syntax::{Expression, Location, Program, Statement};
use crate::{
eval::PrimitiveType,
syntax::{Expression, Location, Program, Statement},
};
use codespan_reporting::diagnostic::Diagnostic;
use std::collections::HashMap;
use std::{collections::HashMap, str::FromStr};
/// An error we found while validating the input program.
///
@@ -11,6 +14,7 @@ use std::collections::HashMap;
/// and using [`codespan_reporting`] to present them to the user.
pub enum Error {
UnboundVariable(Location, String),
UnknownType(Location, String),
}
impl From<Error> for Diagnostic<usize> {
@@ -19,6 +23,10 @@ impl From<Error> for Diagnostic<usize> {
Error::UnboundVariable(location, name) => location
.labelled_error("unbound here")
.with_message(format!("Unbound variable '{}'", name)),
Error::UnknownType(location, name) => location
.labelled_error("type referenced here")
.with_message(format!("Unknown type '{}'", name)),
}
}
}
@@ -57,12 +65,24 @@ impl Program {
/// example, and generates warnings for things that are inadvisable but not
/// actually a problem.
pub fn validate(&self) -> (Vec<Error>, Vec<Warning>) {
let mut bound_variables = HashMap::new();
self.validate_with_bindings(&mut bound_variables)
}
/// Validate that the program makes semantic sense, not just syntactic sense.
///
/// This checks for things like references to variables that don't exist, for
/// example, and generates warnings for things that are inadvisable but not
/// actually a problem.
pub fn validate_with_bindings(
&self,
bound_variables: &mut HashMap<String, Location>,
) -> (Vec<Error>, Vec<Warning>) {
let mut errors = vec![];
let mut warnings = vec![];
let mut bound_variables = HashMap::new();
for stmt in self.statements.iter() {
let (mut new_errors, mut new_warnings) = stmt.validate(&mut bound_variables);
let (mut new_errors, mut new_warnings) = stmt.validate(bound_variables);
errors.append(&mut new_errors);
warnings.append(&mut new_warnings);
}
@@ -81,7 +101,7 @@ impl Statement {
/// occurs. We use a `HashMap` to map these bound locations to the locations
/// where their bound, because these locations are handy when generating errors
/// and warnings.
pub fn validate(
fn validate(
&self,
bound_variables: &mut HashMap<String, Location>,
) -> (Vec<Error>, Vec<Warning>) {
@@ -97,20 +117,20 @@ impl Statement {
errors.append(&mut exp_errors);
warnings.append(&mut exp_warnings);
if let Some(original_binding_site) = bound_variables.get(var) {
if let Some(original_binding_site) = bound_variables.get(&var.name) {
warnings.push(Warning::ShadowedVariable(
original_binding_site.clone(),
loc.clone(),
var.clone(),
var.to_string(),
));
} else {
bound_variables.insert(var.clone(), loc.clone());
bound_variables.insert(var.to_string(), loc.clone());
}
}
Statement::Print(_, var) if bound_variables.contains_key(var) => {}
Statement::Print(_, var) if bound_variables.contains_key(&var.name) => {}
Statement::Print(loc, var) => {
errors.push(Error::UnboundVariable(loc.clone(), var.clone()))
errors.push(Error::UnboundVariable(loc.clone(), var.to_string()))
}
}
@@ -127,6 +147,15 @@ impl Expression {
vec![Error::UnboundVariable(loc.clone(), var.clone())],
vec![],
),
Expression::Cast(location, t, expr) => {
let (mut errs, warns) = expr.validate(variable_map);
if PrimitiveType::from_str(t).is_err() {
errs.push(Error::UnknownType(location.clone(), t.clone()))
}
(errs, warns)
}
Expression::Primitive(_, _, args) => {
let mut errors = vec![];
let mut warnings = vec![];
@@ -142,3 +171,19 @@ impl Expression {
}
}
}
#[test]
fn cast_checks_are_reasonable() {
let good_stmt = Statement::parse(0, "x = <u16>4u8;").expect("valid test case");
let (good_errs, good_warns) = good_stmt.validate(&mut HashMap::new());
assert!(good_errs.is_empty());
assert!(good_warns.is_empty());
let bad_stmt = Statement::parse(0, "x = <apple>4u8;").expect("valid test case");
let (bad_errs, bad_warns) = bad_stmt.validate(&mut HashMap::new());
assert!(bad_warns.is_empty());
assert_eq!(bad_errs.len(), 1);
assert!(matches!(bad_errs[0], Error::UnknownType(_, ref x) if x == "apple"));
}