From 469fe35e4620a436ad6a4f5446bf7a36d5f11ab9 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sun, 4 Jun 2023 17:31:26 -0700 Subject: [PATCH] Add support for multiple integer types. --- examples/basic/test2.ngr | 4 ++ runtime/rts.c | 33 +++++++++-- src/backend.rs | 14 +++-- src/backend/into_crane.rs | 119 +++++++++++++++++++++++++++----------- src/backend/runtime.rs | 26 +++++++-- src/eval/env.rs | 12 ++-- src/eval/primop.rs | 68 +++++++++++++++++++--- src/eval/value.rs | 56 ++++++++++++++++++ src/ir/ast.rs | 65 +++++++++++++++------ src/ir/eval.rs | 17 ++++-- src/ir/from_syntax.rs | 12 +++- src/repl.rs | 5 +- src/syntax.rs | 6 +- src/syntax/arbitrary.rs | 107 ++++++++++++++++++++++++---------- src/syntax/ast.rs | 13 ++++- src/syntax/eval.rs | 19 ++++-- src/syntax/parser.lalrpop | 17 +----- src/syntax/pretty.rs | 39 +++++++++---- src/syntax/tokens.rs | 65 +++++++++++++++++---- 19 files changed, 528 insertions(+), 169 deletions(-) create mode 100644 examples/basic/test2.ngr diff --git a/examples/basic/test2.ngr b/examples/basic/test2.ngr new file mode 100644 index 0000000..0c9dd6d --- /dev/null +++ b/examples/basic/test2.ngr @@ -0,0 +1,4 @@ +// this test is useful for making sure we don't accidentally +// use a signed divide operation anywhere important +a = 96u8 / 160u8; +print a; \ No newline at end of file diff --git a/runtime/rts.c b/runtime/rts.c index 4cf41e5..a999bae 100644 --- a/runtime/rts.c +++ b/runtime/rts.c @@ -2,12 +2,33 @@ #include #include -void print(char *_ignore, char *variable_name, int64_t value) { - printf("%s = %" PRId64 "i64\n", variable_name, value); -} - -void caller() { - print(NULL, "x", 4); +void print(char *_ignore, char *variable_name, int64_t vtype, int64_t value) { + switch(vtype) { + case /* U8 = */ 10: + printf("%s = %" PRIu8 "u8\n", variable_name, (uint8_t)value); + break; + case /* U16 = */ 11: + printf("%s = %" PRIu16 "u16\n", variable_name, (uint16_t)value); + break; + case /* U32 = */ 12: + printf("%s = %" PRIu32 "u32\n", variable_name, (uint32_t)value); + break; + case /* U64 = */ 13: + printf("%s = %" PRIu64 "u64\n", variable_name, (uint64_t)value); + break; + case /* I8 = */ 20: + printf("%s = %" PRIi8 "i8\n", variable_name, (int8_t)value); + break; + case /* I16 = */ 21: + printf("%s = %" PRIi16 "i16\n", variable_name, (int16_t)value); + break; + case /* I32 = */ 22: + printf("%s = %" PRIi32 "i32\n", variable_name, (int32_t)value); + break; + case /* I64 = */ 23: + printf("%s = %" PRIi64 "i64\n", variable_name, value); + break; + } } extern void gogogo(); diff --git a/src/backend.rs b/src/backend.rs index b6b8808..c602135 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -31,15 +31,15 @@ mod eval; mod into_crane; mod runtime; -use std::collections::HashMap; - pub use self::error::BackendError; pub use self::runtime::{RuntimeFunctionError, RuntimeFunctions}; +use crate::syntax::ConstantType; use cranelift_codegen::settings::Configurable; use cranelift_codegen::{isa, settings}; use cranelift_jit::{JITBuilder, JITModule}; use cranelift_module::{default_libcall_names, DataContext, DataId, FuncId, Linkage, Module}; use cranelift_object::{ObjectBuilder, ObjectModule}; +use std::collections::HashMap; use target_lexicon::Triple; const EMPTY_DATUM: [u8; 8] = [0; 8]; @@ -58,7 +58,7 @@ pub struct Backend { data_ctx: DataContext, runtime_functions: RuntimeFunctions, defined_strings: HashMap, - defined_symbols: HashMap, + defined_symbols: HashMap, output_buffer: Option, } @@ -167,14 +167,18 @@ impl Backend { /// These variables can be shared between functions, and will be exported from the /// module itself as public data in the case of static compilation. There initial /// value will be null. - pub fn define_variable(&mut self, name: String) -> Result { + pub fn define_variable( + &mut self, + name: String, + ctype: ConstantType, + ) -> Result { self.data_ctx.define(Box::new(EMPTY_DATUM)); let id = self .module .declare_data(&name, Linkage::Export, true, false)?; self.module.define_data(id, &self.data_ctx)?; self.data_ctx.clear(); - self.defined_symbols.insert(name, id); + self.defined_symbols.insert(name, (id, ctype)); Ok(id) } diff --git a/src/backend/into_crane.rs b/src/backend/into_crane.rs index 5965a9f..f0f4e25 100644 --- a/src/backend/into_crane.rs +++ b/src/backend/into_crane.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; use crate::ir::{Expression, Primitive, Program, Statement, Value, ValueOrRef}; +use crate::syntax::ConstantType; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir::{ - entities, types, Function, GlobalValue, InstBuilder, MemFlags, Signature, UserFuncName, + self, entities, types, Function, GlobalValue, InstBuilder, MemFlags, Signature, UserFuncName, }; use cranelift_codegen::isa::CallConv; use cranelift_codegen::Context; @@ -85,12 +86,12 @@ impl Backend { // Just like with strings, generating the `GlobalValue`s we need can potentially // be a little tricky to do on the fly, so we generate the complete list right // here and then use it later. - let pre_defined_symbols: HashMap = self + let pre_defined_symbols: HashMap = self .defined_symbols .iter() - .map(|(k, v)| { + .map(|(k, (v, t))| { let local_data = self.module.declare_data_in_func(*v, &mut ctx.func); - (k.clone(), local_data) + (k.clone(), (local_data, *t)) }) .collect(); @@ -135,31 +136,47 @@ impl Backend { // Look up the value for the variable. Because this might be a // global variable (and that requires special logic), we just turn // this into an `Expression` and re-use the logic in that implementation. - let val = Expression::Reference(ann, var).into_crane( + let (val, vtype) = Expression::Reference(ann, var).into_crane( &mut builder, &variable_table, &pre_defined_symbols, )?; + let vtype_repr = builder.ins().iconst(types::I64, vtype as i64); + + let casted_val = match vtype { + ConstantType::U64 | ConstantType::I64 => val, + ConstantType::I8 | ConstantType::I16 | ConstantType::I32 => { + builder.ins().sextend(types::I64, val) + } + ConstantType::U8 | ConstantType::U16 | ConstantType::U32 => { + builder.ins().uextend(types::I64, val) + } + }; + // Finally, we can generate the call to print. - builder - .ins() - .call(print_func_ref, &[buffer_ptr, name_ptr, val]); + builder.ins().call( + print_func_ref, + &[buffer_ptr, name_ptr, vtype_repr, casted_val], + ); } // Variable binding is a little more con Statement::Binding(_, var_name, value) => { // Kick off to the `Expression` implementation to see what value we're going // to bind to this variable. - let val = + let (val, etype) = value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?; // Now the question is: is this a local variable, or a global one? - if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) { + if let Some((global_id, ctype)) = pre_defined_symbols.get(var_name.as_str()) { // It's a global variable! In this case, we assume that someone has already // dedicated some space in memory to store this value. We look this location // up, and then tell Cranelift to store the value there. - let val_ptr = builder.ins().symbol_value(types::I64, *global_id); + assert_eq!(etype, *ctype); + let val_ptr = builder + .ins() + .symbol_value(ir::Type::from(*ctype), *global_id); builder.ins().store(MemFlags::new(), val, val_ptr, 0); } else { // It's a local variable! In this case, we need to allocate a new Cranelift @@ -171,12 +188,10 @@ impl Backend { next_var_num += 1; // We can add the variable directly to our local variable map; it's `Copy`. - variable_table.insert(var_name, var); + variable_table.insert(var_name, (var, etype)); - // Now we tell Cranelift about our new variable, which has type I64 because - // everything we have at this point is of type I64. Once it's declare, we - // define it as having the value we computed above. - builder.declare_var(var, types::I64); + // Now we tell Cranelift about our new variable! + builder.declare_var(var, ir::Type::from(etype)); builder.def_var(var, val); } } @@ -231,26 +246,60 @@ impl Expression { fn into_crane( self, builder: &mut FunctionBuilder, - local_variables: &HashMap, Variable>, - global_variables: &HashMap, - ) -> Result { + local_variables: &HashMap, (Variable, ConstantType)>, + global_variables: &HashMap, + ) -> Result<(entities::Value, ConstantType), BackendError> { match self { // Values are pretty straightforward to compile, mostly because we only // have one type of variable, and it's an integer type. - Expression::Value(_, Value::Number(_, v)) => Ok(builder.ins().iconst(types::I64, v)), + Expression::Value(_, val) => match val { + Value::I8(_, v) => { + Ok((builder.ins().iconst(types::I8, v as i64), ConstantType::I8)) + } + Value::I16(_, v) => Ok(( + builder.ins().iconst(types::I16, v as i64), + ConstantType::I16, + )), + Value::I32(_, v) => Ok(( + builder.ins().iconst(types::I32, v as i64), + ConstantType::I32, + )), + Value::I64(_, v) => Ok((builder.ins().iconst(types::I64, v), ConstantType::I64)), + Value::U8(_, v) => { + Ok((builder.ins().iconst(types::I8, v as i64), ConstantType::U8)) + } + Value::U16(_, v) => Ok(( + builder.ins().iconst(types::I16, v as i64), + ConstantType::U16, + )), + Value::U32(_, v) => Ok(( + builder.ins().iconst(types::I32, v as i64), + ConstantType::U32, + )), + Value::U64(_, v) => Ok(( + builder.ins().iconst(types::I64, v as i64), + ConstantType::U64, + )), + }, Expression::Reference(_, name) => { // first we see if this is a local variable (which is nicer, from an // optimization point of view.) - if let Some(local_var) = local_variables.get(&name) { - return Ok(builder.use_var(*local_var)); + if let Some((local_var, etype)) = local_variables.get(&name) { + return Ok((builder.use_var(*local_var), *etype)); } // then we check to see if this is a global reference, which requires us to // first lookup where the value is stored, and then load it. - if let Some(global_var) = global_variables.get(name.as_ref()) { - let val_ptr = builder.ins().symbol_value(types::I64, *global_var); - return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0)); + if let Some((global_var, etype)) = global_variables.get(name.as_ref()) { + let cranelift_type = ir::Type::from(*etype); + let val_ptr = builder.ins().symbol_value(cranelift_type, *global_var); + return Ok(( + builder + .ins() + .load(cranelift_type, MemFlags::new(), val_ptr, 0), + *etype, + )); } // this should never happen, because we should have made sure that there are @@ -260,25 +309,27 @@ impl Expression { Expression::Primitive(_, prim, mut vals) => { // we're going to use `pop`, so we're going to pull and compile the right value ... - let right = + let (right, rtype) = vals.pop() .unwrap() .into_crane(builder, local_variables, global_variables)?; // ... and then the left. - let left = + let (left, ltype) = vals.pop() .unwrap() .into_crane(builder, local_variables, global_variables)?; + assert_eq!(rtype, ltype, "primitive argument types match"); // then we just need to tell Cranelift how to do each of our primitives! Much // like Statements, above, we probably want to eventually shuffle this off into // a separate function (maybe something off `Primitive`), but for now it's simple // enough that we just do the `match` here. match prim { - Primitive::Plus => Ok(builder.ins().iadd(left, right)), - Primitive::Minus => Ok(builder.ins().isub(left, right)), - Primitive::Times => Ok(builder.ins().imul(left, right)), - Primitive::Divide => Ok(builder.ins().sdiv(left, right)), + Primitive::Plus => Ok((builder.ins().iadd(left, right), ltype)), + Primitive::Minus => Ok((builder.ins().isub(left, right), ltype)), + Primitive::Times => Ok((builder.ins().imul(left, right), ltype)), + Primitive::Divide if rtype.is_signed() => Ok((builder.ins().sdiv(left, right), ltype)), + Primitive::Divide => Ok((builder.ins().udiv(left, right), ltype)), } } } @@ -291,9 +342,9 @@ impl ValueOrRef { fn into_crane( self, builder: &mut FunctionBuilder, - local_variables: &HashMap, Variable>, - global_variables: &HashMap, - ) -> Result { + local_variables: &HashMap, (Variable, ConstantType)>, + global_variables: &HashMap, + ) -> Result<(entities::Value, ConstantType), BackendError> { Expression::from(self).into_crane(builder, local_variables, global_variables) } } diff --git a/src/backend/runtime.rs b/src/backend/runtime.rs index a03acf7..f766b97 100644 --- a/src/backend/runtime.rs +++ b/src/backend/runtime.rs @@ -8,6 +8,8 @@ use std::fmt::Write; use target_lexicon::Triple; use thiserror::Error; +use crate::syntax::ConstantType; + /// An object for querying / using functions built into the runtime. /// /// Right now, this is a quite a bit of boilerplate for very nebulous @@ -49,7 +51,7 @@ impl RuntimeFunctions { "print", Linkage::Import, &Signature { - params: vec![string_param, string_param, int64_param], + params: vec![string_param, string_param, int64_param, int64_param], returns: vec![], call_conv: CallConv::triple_default(platform), }, @@ -98,13 +100,29 @@ impl RuntimeFunctions { // we extend with the output, so that multiple JIT'd `Program`s can run concurrently // without stomping over each other's output. If `output_buffer` is NULL, we just print // to stdout. -extern "C" fn runtime_print(output_buffer: *mut String, name: *const i8, value: i64) { +extern "C" fn runtime_print( + output_buffer: *mut String, + name: *const i8, + vtype_repr: i64, + value: i64, +) { let cstr = unsafe { CStr::from_ptr(name) }; let reconstituted = cstr.to_string_lossy(); + let vtype = match vtype_repr.try_into() { + Ok(ConstantType::I8) => "i8", + Ok(ConstantType::I16) => "i16", + Ok(ConstantType::I32) => "i32", + Ok(ConstantType::I64) => "i64", + Ok(ConstantType::U8) => "u8", + Ok(ConstantType::U16) => "u16", + Ok(ConstantType::U32) => "u32", + Ok(ConstantType::U64) => "u64", + Err(_) => "", + }; if let Some(output_buffer) = unsafe { output_buffer.as_mut() } { - writeln!(output_buffer, "{} = {}i64", reconstituted, value).unwrap(); + writeln!(output_buffer, "{} = {}{}", reconstituted, value, vtype).unwrap(); } else { - println!("{} = {}", reconstituted, value); + println!("{} = {}{}", reconstituted, value, vtype); } } diff --git a/src/eval/env.rs b/src/eval/env.rs index a1a0320..551570c 100644 --- a/src/eval/env.rs +++ b/src/eval/env.rs @@ -87,9 +87,9 @@ mod tests { let tester = tester.extend(arced("bar"), 2i64.into()); let tester = tester.extend(arced("goo"), 5i64.into()); - assert_eq!(tester.lookup(arced("foo")), Ok(1.into())); - assert_eq!(tester.lookup(arced("bar")), Ok(2.into())); - assert_eq!(tester.lookup(arced("goo")), Ok(5.into())); + assert_eq!(tester.lookup(arced("foo")), Ok(1i64.into())); + assert_eq!(tester.lookup(arced("bar")), Ok(2i64.into())); + assert_eq!(tester.lookup(arced("goo")), Ok(5i64.into())); assert!(tester.lookup(arced("baz")).is_err()); } @@ -103,14 +103,14 @@ mod tests { check_nested(&tester); - assert_eq!(tester.lookup(arced("foo")), Ok(1.into())); + assert_eq!(tester.lookup(arced("foo")), Ok(1i64.into())); assert!(tester.lookup(arced("bar")).is_err()); } fn check_nested(env: &EvalEnvironment) { let nested_env = env.extend(arced("bar"), 2i64.into()); - assert_eq!(nested_env.lookup(arced("foo")), Ok(1.into())); - assert_eq!(nested_env.lookup(arced("bar")), Ok(2.into())); + assert_eq!(nested_env.lookup(arced("foo")), Ok(1i64.into())); + assert_eq!(nested_env.lookup(arced("bar")), Ok(2i64.into())); } fn arced(s: &str) -> ArcIntern { diff --git a/src/eval/primop.rs b/src/eval/primop.rs index 49c014c..ffc74f2 100644 --- a/src/eval/primop.rs +++ b/src/eval/primop.rs @@ -61,15 +61,69 @@ macro_rules! run_op { impl Value { fn binary_op(operation: &str, left: &Value, right: &Value) -> Result { match left { - // for now we only have one type, but in the future this is - // going to be very irritating. + Value::I8(x) => match right { + Value::I8(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::I16(x) => match right { + Value::I16(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::I32(x) => match right { + Value::I32(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, Value::I64(x) => match right { Value::I64(y) => run_op!(operation, x, *y), - // _ => Err(PrimOpError::TypeMismatch( - // operation.to_string(), - // left.clone(), - // right.clone(), - // )), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::U8(x) => match right { + Value::U8(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::U16(x) => match right { + Value::U16(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::U32(x) => match right { + Value::U32(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), + }, + Value::U64(x) => match right { + Value::U64(y) => run_op!(operation, x, *y), + _ => Err(PrimOpError::TypeMismatch( + operation.to_string(), + left.clone(), + right.clone(), + )), }, } } diff --git a/src/eval/value.rs b/src/eval/value.rs index ba0b0bd..12f3746 100644 --- a/src/eval/value.rs +++ b/src/eval/value.rs @@ -7,19 +7,75 @@ use std::fmt::Display; /// by type so that we don't mix them up. #[derive(Clone, Debug, PartialEq)] pub enum Value { + I8(i8), + I16(i16), + I32(i32), I64(i64), + U8(u8), + U16(u16), + U32(u32), + U64(u64), } impl Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Value::I8(x) => write!(f, "{}i8", x), + Value::I16(x) => write!(f, "{}i16", x), + Value::I32(x) => write!(f, "{}i32", x), Value::I64(x) => write!(f, "{}i64", x), + Value::U8(x) => write!(f, "{}u8", x), + Value::U16(x) => write!(f, "{}u16", x), + Value::U32(x) => write!(f, "{}u32", x), + Value::U64(x) => write!(f, "{}u64", x), } } } +impl From for Value { + fn from(value: i8) -> Self { + Value::I8(value) + } +} + +impl From for Value { + fn from(value: i16) -> Self { + Value::I16(value) + } +} + +impl From for Value { + fn from(value: i32) -> Self { + Value::I32(value) + } +} + impl From for Value { fn from(value: i64) -> Self { Value::I64(value) } } + +impl From for Value { + fn from(value: u8) -> Self { + Value::U8(value) + } +} + +impl From for Value { + fn from(value: u16) -> Self { + Value::U16(value) + } +} + +impl From for Value { + fn from(value: u32) -> Self { + Value::U32(value) + } +} + +impl From for Value { + fn from(value: u64) -> Self { + Value::U64(value) + } +} diff --git a/src/ir/ast.rs b/src/ir/ast.rs index 3d8446d..e659b8d 100644 --- a/src/ir/ast.rs +++ b/src/ir/ast.rs @@ -1,4 +1,4 @@ -use crate::syntax::Location; +use crate::syntax::{self, ConstantType, Location}; use internment::ArcIntern; use pretty::{DocAllocator, Pretty}; use proptest::{ @@ -224,14 +224,21 @@ impl From for Expression { } /// A constant in the IR. -#[derive(Debug)] +/// +/// The optional argument in numeric types is the base that was used by the +/// user to input the number. By retaining it, we can ensure that if we need +/// to print the number back out, we can do so in the form that the user +/// entered it. +#[derive(Clone, Debug)] pub enum Value { - /// A numerical constant. - /// - /// The optional argument is the base that was used by the user to input - /// the number. By retaining it, we can ensure that if we need to print the - /// number back out, we can do so in the form that the user entered it. - Number(Option, i64), + I8(Option, i8), + I16(Option, i16), + I32(Option, i32), + I64(Option, i64), + U8(Option, u8), + U16(Option, u16), + U32(Option, u32), + U64(Option, u64), } impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value @@ -240,19 +247,39 @@ where D: ?Sized + DocAllocator<'a, A>, { fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { - match self { - Value::Number(opt_base, value) => { - let value_str = match opt_base { - None => format!("{}", value), - Some(2) => format!("0b{:b}", value), - Some(8) => format!("0o{:o}", value), - Some(10) => format!("0d{}", value), - Some(16) => format!("0x{:x}", value), - Some(_) => format!("!!{:x}!!", value), - }; + let pretty_internal = |opt_base: &Option, x, t| { + syntax::Value::Number(*opt_base, Some(t), x).pretty(allocator) + }; - allocator.text(value_str) + let pretty_internal_signed = |opt_base, x: i64, t| { + let base = pretty_internal(opt_base, x.unsigned_abs(), t); + + allocator.text("-").append(base) + }; + + match self { + Value::I8(opt_base, value) => { + pretty_internal_signed(opt_base, *value as i64, ConstantType::I8) } + Value::I16(opt_base, value) => { + pretty_internal_signed(opt_base, *value as i64, ConstantType::I16) + } + Value::I32(opt_base, value) => { + pretty_internal_signed(opt_base, *value as i64, ConstantType::I32) + } + Value::I64(opt_base, value) => { + pretty_internal_signed(opt_base, *value, ConstantType::I64) + } + Value::U8(opt_base, value) => { + pretty_internal(opt_base, *value as u64, ConstantType::U8) + } + Value::U16(opt_base, value) => { + pretty_internal(opt_base, *value as u64, ConstantType::U16) + } + Value::U32(opt_base, value) => { + pretty_internal(opt_base, *value as u64, ConstantType::U32) + } + Value::U64(opt_base, value) => pretty_internal(opt_base, *value, ConstantType::U64), } } } diff --git a/src/ir/eval.rs b/src/ir/eval.rs index 78b8b0b..4830ce3 100644 --- a/src/ir/eval.rs +++ b/src/ir/eval.rs @@ -35,7 +35,14 @@ impl Expression { fn eval(&self, env: &EvalEnvironment) -> Result { match self { Expression::Value(_, v) => match v { - super::Value::Number(_, v) => Ok(Value::I64(*v)), + super::Value::I8(_, v) => Ok(Value::I8(*v)), + super::Value::I16(_, v) => Ok(Value::I16(*v)), + super::Value::I32(_, v) => Ok(Value::I32(*v)), + super::Value::I64(_, v) => Ok(Value::I64(*v)), + super::Value::U8(_, v) => Ok(Value::U8(*v)), + super::Value::U16(_, v) => Ok(Value::U16(*v)), + super::Value::U32(_, v) => Ok(Value::U32(*v)), + super::Value::U64(_, v) => Ok(Value::U64(*v)), }, Expression::Reference(_, n) => Ok(env.lookup(n.clone())?), @@ -49,8 +56,8 @@ impl Expression { for arg in args.iter() { match arg { ValueOrRef::Ref(_, n) => arg_values.push(env.lookup(n.clone())?), - ValueOrRef::Value(_, super::Value::Number(_, v)) => { - arg_values.push(Value::I64(*v)) + ValueOrRef::Value(loc, val) => { + arg_values.push(Expression::Value(loc.clone(), val.clone()).eval(env)?) } } } @@ -73,7 +80,7 @@ fn two_plus_three() { let input = crate::syntax::Program::parse(0, "x = 2 + 3; print x;").expect("parse works"); let ir = Program::from(input); let output = ir.eval().expect("runs successfully"); - assert_eq!("x = 5i64\n", &output); + assert_eq!("x = 5u64\n", &output); } #[test] @@ -82,5 +89,5 @@ fn lotsa_math() { crate::syntax::Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works"); let ir = Program::from(input); let output = ir.eval().expect("runs successfully"); - assert_eq!("x = 7i64\n", &output); + assert_eq!("x = 7u64\n", &output); } diff --git a/src/ir/from_syntax.rs b/src/ir/from_syntax.rs index 98f3ad3..88d0813 100644 --- a/src/ir/from_syntax.rs +++ b/src/ir/from_syntax.rs @@ -149,7 +149,17 @@ impl syntax::Expression { impl From for ir::Value { fn from(value: syntax::Value) -> Self { match value { - syntax::Value::Number(base, val) => ir::Value::Number(base, val), + syntax::Value::Number(base, ty, val) => match ty { + None => ir::Value::U64(base, val), + Some(syntax::ConstantType::I8) => ir::Value::I8(base, val as i8), + Some(syntax::ConstantType::I16) => ir::Value::I16(base, val as i16), + Some(syntax::ConstantType::I32) => ir::Value::I32(base, val as i32), + Some(syntax::ConstantType::I64) => ir::Value::I64(base, val as i64), + Some(syntax::ConstantType::U8) => ir::Value::U8(base, val as u8), + Some(syntax::ConstantType::U16) => ir::Value::U16(base, val as u16), + Some(syntax::ConstantType::U32) => ir::Value::U32(base, val as u32), + Some(syntax::ConstantType::U64) => ir::Value::U64(base, val), + }, } } } diff --git a/src/repl.rs b/src/repl.rs index 5d511b5..d199fbb 100644 --- a/src/repl.rs +++ b/src/repl.rs @@ -1,6 +1,6 @@ use crate::backend::{Backend, BackendError}; use crate::ir::Program as IR; -use crate::syntax::{Location, ParserError, Statement}; +use crate::syntax::{ConstantType, Location, ParserError, Statement}; use codespan_reporting::diagnostic::Diagnostic; use codespan_reporting::files::SimpleFiles; use codespan_reporting::term::{self, Config}; @@ -135,7 +135,8 @@ impl REPL { if let Statement::Binding(_, ref name, _) = syntax { if !self.variable_binding_sites.contains_key(name.as_str()) { self.jitter.define_string(name)?; - self.jitter.define_variable(name.clone())?; + self.jitter + .define_variable(name.clone(), ConstantType::U64)?; } }; diff --git a/src/syntax.rs b/src/syntax.rs index 0ed88ee..689c407 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -299,18 +299,18 @@ fn order_of_operations() { Location::new(testfile, 6), "+".to_string(), vec![ - Expression::Value(Location::new(testfile, 4), Value::Number(None, 1)), + Expression::Value(Location::new(testfile, 4), Value::Number(None, None, 1),), Expression::Primitive( Location::new(testfile, 10), "*".to_string(), vec![ Expression::Value( Location::new(testfile, 8), - Value::Number(None, 2), + Value::Number(None, None, 2), ), Expression::Value( Location::new(testfile, 12), - Value::Number(None, 3), + Value::Number(None, None, 3), ), ] ) diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 52f43ab..0d3a9a9 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,12 +1,11 @@ -use std::collections::HashSet; - -use crate::syntax::ast::{Expression, Program, Statement, Value}; +use crate::syntax::ast::{ConstantType, Expression, Program, Statement, Value}; use crate::syntax::location::Location; use proptest::sample::select; use proptest::{ prelude::{Arbitrary, BoxedStrategy, Strategy}, strategy::{Just, Union}, }; +use std::collections::HashMap; const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*"; @@ -27,36 +26,38 @@ impl Arbitrary for Program { type Strategy = BoxedStrategy; fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { - let optionals = Vec::>::arbitrary(); + let optionals = Vec::>::arbitrary(); optionals .prop_flat_map(|mut possible_names| { let mut statements = Vec::new(); - let mut defined_variables: HashSet = HashSet::new(); + let mut defined_variables: HashMap = HashMap::new(); for possible_name in possible_names.drain(..) { match possible_name { None if defined_variables.is_empty() => continue, None => statements.push( - Union::new(defined_variables.iter().map(|name| { + Union::new(defined_variables.keys().map(|name| { Just(Statement::Print(Location::manufactured(), name.to_string())) })) .boxed(), ), - Some(new_name) => { + Some((new_name, new_type)) => { let closures_name = new_name.0.clone(); - let retval = - Expression::arbitrary_with(Some(defined_variables.clone())) - .prop_map(move |exp| { - Statement::Binding( - Location::manufactured(), - closures_name.clone(), - exp, - ) - }) - .boxed(); + let retval = Expression::arbitrary_with(( + Some(defined_variables.clone()), + Some(new_type), + )) + .prop_map(move |exp| { + Statement::Binding( + Location::manufactured(), + closures_name.clone(), + exp, + ) + }) + .boxed(); - defined_variables.insert(new_name.0); + defined_variables.insert(new_name.0, new_type); statements.push(retval); } } @@ -70,7 +71,7 @@ impl Arbitrary for Program { } impl Arbitrary for Statement { - type Parameters = Option>; + type Parameters = Option>; type Strategy = BoxedStrategy; fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { @@ -79,7 +80,7 @@ impl Arbitrary for Statement { let binding_strategy = ( VALID_VARIABLE_NAMES, - Expression::arbitrary_with(duplicated_args), + Expression::arbitrary_with((duplicated_args, None)), ) .prop_map(|(name, exp)| Statement::Binding(Location::manufactured(), name, exp)) .boxed(); @@ -89,7 +90,7 @@ impl Arbitrary for Statement { } else { let print_strategy = Union::new( defined_variables - .iter() + .keys() .map(|x| Just(Statement::Print(Location::manufactured(), x.to_string()))), ) .boxed(); @@ -100,20 +101,25 @@ impl Arbitrary for Statement { } impl Arbitrary for Expression { - type Parameters = Option>; + type Parameters = (Option>, Option); type Strategy = BoxedStrategy; - fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - let defined_variables = args.unwrap_or_default(); + fn arbitrary_with((env, target_type): Self::Parameters) -> Self::Strategy { + let defined_variables = env.unwrap_or_default(); + let mut acceptable_variables = defined_variables + .iter() + .filter(|(_, ctype)| Some(**ctype) == target_type) + .map(|(x, _)| x) + .peekable(); - let value_strategy = Value::arbitrary() + let value_strategy = Value::arbitrary_with(target_type) .prop_map(move |x| Expression::Value(Location::manufactured(), x)) .boxed(); - let leaf_strategy = if defined_variables.is_empty() { + let leaf_strategy = if acceptable_variables.peek().is_none() { value_strategy } else { - let reference_strategy = Union::new(defined_variables.iter().map(|x| { + let reference_strategy = Union::new(acceptable_variables.map(|x| { Just(Expression::Reference( Location::manufactured(), x.to_owned(), @@ -138,10 +144,10 @@ impl Arbitrary for Expression { } impl Arbitrary for Value { - type Parameters = (); + type Parameters = Option; type Strategy = BoxedStrategy; - fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + fn arbitrary_with(target_type: Self::Parameters) -> Self::Strategy { let base_strategy = Union::new([ Just(None::), Just(Some(2)), @@ -150,10 +156,47 @@ impl Arbitrary for Value { Just(Some(16)), ]); - let value_strategy = i64::arbitrary(); + let type_strategy = if target_type.is_some() { + Just(target_type).boxed() + } else { + proptest::option::of(ConstantType::arbitrary()).boxed() + }; + let value_strategy = u64::arbitrary(); - (base_strategy, value_strategy) - .prop_map(move |(base, value)| Value::Number(base, value)) + (base_strategy, type_strategy, value_strategy) + .prop_map(move |(base, ty, value)| { + let converted_value = match ty { + Some(ConstantType::I8) => value % (i8::MAX as u64), + Some(ConstantType::U8) => value % (u8::MAX as u64), + Some(ConstantType::I16) => value % (i16::MAX as u64), + Some(ConstantType::U16) => value % (u16::MAX as u64), + Some(ConstantType::I32) => value % (i32::MAX as u64), + Some(ConstantType::U32) => value % (u32::MAX as u64), + Some(ConstantType::I64) => value % (i64::MAX as u64), + Some(ConstantType::U64) => value, + None => value, + }; + Value::Number(base, ty, converted_value) + }) .boxed() } } + +impl Arbitrary for ConstantType { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + Union::new([ + Just(ConstantType::I8), + Just(ConstantType::I16), + Just(ConstantType::I32), + Just(ConstantType::I64), + Just(ConstantType::U8), + Just(ConstantType::U16), + Just(ConstantType::U32), + Just(ConstantType::U64), + ]) + .boxed() + } +} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index c84e544..5e5792a 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -1,3 +1,4 @@ +pub use crate::syntax::tokens::ConstantType; use crate::syntax::Location; /// The set of valid binary operators. @@ -76,7 +77,7 @@ impl PartialEq for Expression { Expression::Cast(_, t1, e1) => match other { Expression::Cast(_, t2, e2) => t1 == t2 && e1 == e2, _ => false, - } + }, Expression::Primitive(_, prim1, args1) => match other { Expression::Primitive(_, prim2, args2) => prim1 == prim2 && args1 == args2, _ => false, @@ -88,6 +89,12 @@ impl PartialEq for Expression { /// A value from the source syntax #[derive(Clone, Debug, PartialEq, Eq)] pub enum Value { - /// The value of the number, and an optional base that it was written in - Number(Option, i64), + /// The value of the number, an optional base that it was written in, and any + /// type information provided. + /// + /// u64 is chosen because it should be big enough to carry the amount of + /// information we need, and technically we interpret -4 as the primitive unary + /// operation "-" on the number 4. We'll translate this into a type-specific + /// number at a later time. + Number(Option, Option, u64), } diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index 276b242..9122dd9 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -1,7 +1,7 @@ use internment::ArcIntern; use crate::eval::{EvalEnvironment, EvalError, Value}; -use crate::syntax::{Expression, Program, Statement}; +use crate::syntax::{ConstantType, Expression, Program, Statement}; impl Program { /// Evaluate the program, returning either an error or what it prints out when run. @@ -43,7 +43,18 @@ impl Expression { fn eval(&self, env: &EvalEnvironment) -> Result { match self { Expression::Value(_, v) => match v { - super::Value::Number(_, v) => Ok(Value::I64(*v)), + super::Value::Number(_, ty, v) => match ty { + None => Ok(Value::U64(*v)), + // FIXME: make these types validate their input size + Some(ConstantType::I8) => Ok(Value::I8(*v as i8)), + Some(ConstantType::I16) => Ok(Value::I16(*v as i16)), + Some(ConstantType::I32) => Ok(Value::I32(*v as i32)), + Some(ConstantType::I64) => Ok(Value::I64(*v as i64)), + Some(ConstantType::U8) => Ok(Value::U8(*v as u8)), + Some(ConstantType::U16) => Ok(Value::U16(*v as u16)), + Some(ConstantType::U32) => Ok(Value::U32(*v as u32)), + Some(ConstantType::U64) => Ok(Value::U64(*v)), + }, }, Expression::Reference(_, n) => Ok(env.lookup(ArcIntern::new(n.clone()))?), @@ -68,12 +79,12 @@ impl Expression { fn two_plus_three() { let input = Program::parse(0, "x = 2 + 3; print x;").expect("parse works"); let output = input.eval().expect("runs successfully"); - assert_eq!("x = 5i64\n", &output); + assert_eq!("x = 5u64\n", &output); } #[test] fn lotsa_math() { let input = Program::parse(0, "x = 2 + 3 * 10 / 5 - 1; print x;").expect("parse works"); let output = input.eval().expect("runs successfully"); - assert_eq!("x = 7i64\n", &output); + assert_eq!("x = 7u64\n", &output); } diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 7694a4e..43019c3 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -46,7 +46,7 @@ extern { // to name and use "their value", you get their source location. // For these, we want "their value" to be their actual contents, // which is why we put their types in angle brackets. - "" => Token::Number((>,>,)), + "" => Token::Number((>,>,)), "" => Token::Variable(>), } } @@ -150,20 +150,7 @@ AtomicExpression: Expression = { // just a variable reference "> => Expression::Reference(Location::new(file_idx, l), v.to_string()), // just a number - "> => { - let val = Value::Number(n.0, n.2); - Expression::Value(Location::new(file_idx, l), val) - }, - // a tricky case: also just a number, but using a negative sign. an - // alternative way to do this -- and we may do this eventually -- is - // to implement a unary negation expression. this has the odd effect - // that the user never actually writes down a negative number; they just - // write positive numbers which are immediately sent to a negation - // primitive! - "-" "> => { - let val = Value::Number(n.0, -n.2); - Expression::Value(Location::new(file_idx, l), val) - }, + "> => Expression::Value(Location::new(file_idx, l), Value::Number(n.0, n.1, n.2)), // finally, let people parenthesize expressions and get back to a // lower precedence "(" ")" => e, diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index 4f86613..7d92589 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -1,6 +1,8 @@ use crate::syntax::ast::{Expression, Program, Statement, Value, BINARY_OPERATORS}; use pretty::{DocAllocator, DocBuilder, Pretty}; +use super::ConstantType; + impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program where A: 'a, @@ -50,10 +52,10 @@ where match self { Expression::Value(_, val) => val.pretty(allocator), Expression::Reference(_, var) => allocator.text(var.to_string()), - Expression::Cast(_, t, e) => - allocator.text(t.clone()) - .angles() - .append(e.pretty(allocator)), + Expression::Cast(_, t, e) => allocator + .text(t.clone()) + .angles() + .append(e.pretty(allocator)), Expression::Primitive(_, op, exprs) if BINARY_OPERATORS.contains(&op.as_ref()) => { assert_eq!( exprs.len(), @@ -88,15 +90,14 @@ where { fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> { match self { - Value::Number(opt_base, value) => { - let sign = if *value < 0 { "-" } else { "" }; + Value::Number(opt_base, ty, value) => { let value_str = match opt_base { - None => format!("{}", value), - Some(2) => format!("{}0b{:b}", sign, value.abs()), - Some(8) => format!("{}0o{:o}", sign, value.abs()), - Some(10) => format!("{}0d{}", sign, value.abs()), - Some(16) => format!("{}0x{:x}", sign, value.abs()), - Some(_) => format!("!!{}{:x}!!", sign, value.abs()), + None => format!("{}{}", value, type_suffix(ty)), + Some(2) => format!("0b{:b}{}", value, type_suffix(ty)), + Some(8) => format!("0o{:o}{}", value, type_suffix(ty)), + Some(10) => format!("0d{}{}", value, type_suffix(ty)), + Some(16) => format!("0x{:x}{}", value, type_suffix(ty)), + Some(_) => format!("!!{:x}{}!!", value, type_suffix(ty)), }; allocator.text(value_str) @@ -105,6 +106,20 @@ where } } +fn type_suffix(x: &Option) -> &'static str { + match x { + None => "", + Some(ConstantType::I8) => "i8", + Some(ConstantType::I16) => "i16", + Some(ConstantType::I32) => "i32", + Some(ConstantType::I64) => "i64", + Some(ConstantType::U8) => "u8", + Some(ConstantType::U16) => "u16", + Some(ConstantType::U32) => "u32", + Some(ConstantType::U64) => "u64", + } +} + #[derive(Clone, Copy)] struct CommaSep {} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index edad9ce..2ce31e3 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -66,7 +66,7 @@ pub enum Token { #[regex(r"0d[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(10), v))] #[regex(r"0x[0-9a-fA-F]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(Some(16), v))] #[regex(r"[0-9]+(u8|i8|u16|i16|u32|i32|u64|i64)?", |v| parse_number(None, v))] - Number((Option, Option, i64)), + Number((Option, Option, u64)), // Variables; this is a very standard, simple set of characters // for variables, but feel free to experiment with more complicated @@ -143,16 +143,59 @@ impl Token { } } +#[repr(i64)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ConstantType { - U8, - U16, - U32, - U64, - I8, - I16, - I32, - I64, + U8 = 10, + U16 = 11, + U32 = 12, + U64 = 13, + I8 = 20, + I16 = 21, + I32 = 22, + I64 = 23, +} + +impl From for cranelift_codegen::ir::Type { + fn from(value: ConstantType) -> Self { + match value { + ConstantType::I8 | ConstantType::U8 => cranelift_codegen::ir::types::I8, + ConstantType::I16 | ConstantType::U16 => cranelift_codegen::ir::types::I16, + ConstantType::I32 | ConstantType::U32 => cranelift_codegen::ir::types::I32, + ConstantType::I64 | ConstantType::U64 => cranelift_codegen::ir::types::I64, + } + } +} + +impl ConstantType { + /// Returns true if the given type is (a) numeric and (b) signed; + pub fn is_signed(&self) -> bool { + matches!(self, ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64) + } +} + +#[derive(Debug, Error, PartialEq)] +pub enum InvalidConstantType { + #[error("Unrecognized constant {0} for constant type")] + Value(i64), +} + +impl TryFrom for ConstantType { + type Error = InvalidConstantType; + + fn try_from(value: i64) -> Result { + match value { + 10 => Ok(ConstantType::U8), + 11 => Ok(ConstantType::U16), + 12 => Ok(ConstantType::U32), + 13 => Ok(ConstantType::U64), + 20 => Ok(ConstantType::I8), + 21 => Ok(ConstantType::I16), + 22 => Ok(ConstantType::I32), + 23 => Ok(ConstantType::I64), + _ => Err(InvalidConstantType::Value(value)), + } + } } /// Parse a number in the given base, return a pair of the base and the @@ -162,7 +205,7 @@ pub enum ConstantType { fn parse_number( base: Option, value: &Lexer, -) -> Result<(Option, Option, i64), ParseIntError> { +) -> Result<(Option, Option, u64), ParseIntError> { let (radix, strval) = match base { None => (10, value.slice()), Some(radix) => (radix, &value.slice()[2..]), @@ -188,7 +231,7 @@ fn parse_number( (None, strval) }; - let intval = i64::from_str_radix(strval, radix as u32)?; + let intval = u64::from_str_radix(strval, radix as u32)?; Ok((base, declared_type, intval)) }