Files
ngr/src/backend/into_crane.rs
Adam Wick bd3b9af469 🤔 Add a type inference engine, along with typed literals. (#4)
The typed literal formatting mirrors that of Rust. If no type can be
inferred for an untagged literal, the type inference engine will warn
the user and then assume that they meant an unsigned 64-bit number.
(This is slightly inconvenient, because there can be cases in which our
Arbitrary instance may generate a unary negation, in which we should
assume that it's a signed 64-bit number; we may want to revisit this
later.)

The type inference engine is a standard two phase one, in which we first
generate a series of type constraints, and then we solve those
constraints. In this particular implementation, we actually use a third
phase to generate a final AST.

Finally, to increase the amount of testing performed, I've removed the
overflow checking in the evaluator. The only thing we now check for is
division by zero. This does make things a trace slower in testing, but
hopefully we get more coverage this way.
2023-09-19 20:40:05 -07:00

429 lines
21 KiB
Rust

use std::collections::HashMap;
use crate::eval::PrimitiveType;
use crate::ir::{Expression, Primitive, Program, Statement, Type, Value, ValueOrRef};
use crate::syntax::ConstantType;
use cranelift_codegen::entity::EntityRef;
use cranelift_codegen::ir::{
self, entities, types, Function, GlobalValue, InstBuilder, MemFlags, Signature, UserFuncName,
};
use cranelift_codegen::isa::CallConv;
use cranelift_codegen::Context;
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
use cranelift_module::{FuncId, Linkage, Module};
use internment::ArcIntern;
use crate::backend::error::BackendError;
use crate::backend::Backend;
/// When we're compiling, we might need to reference some of the strings built into
/// the source code; to do so, we need a `GlobalValue`. Perhaps unexpectedly, given
/// the name, `GlobalValue`s are specific to a single function we're compiling, so
/// we end up computing this table for every function.
///
/// This just a handy type alias to avoid a lot of confusion in the functions.
type StringTable = HashMap<ArcIntern<String>, GlobalValue>;
impl<M: Module> Backend<M> {
/// Compile the given `Program` into a function with the given name.
///
/// At some point, the use of `Program` is going to change; however, for the
/// moment, we have no notion of a function in our language so the whole input
/// is converted into a single output function. The type of the generated
/// function is, essentially, `fn() -> ()`: it takes no arguments and returns
/// no value.
///
/// The function provided can then be either written to a file (if using a
/// static Cranelift backend) or executed directly (if using the Cranelift JIT).
pub fn compile_function(
&mut self,
function_name: &str,
mut program: Program,
) -> Result<FuncId, BackendError> {
let basic_signature = Signature {
params: vec![],
returns: vec![],
call_conv: CallConv::triple_default(&self.platform),
};
// this generates the handle for the function that we'll eventually want to
// return to the user. For now, we declare all functions defined by this
// function as public/global/exported, although we may want to reconsider
// this decision later.
let func_id =
self.module
.declare_function(function_name, Linkage::Export, &basic_signature)?;
// Next we have to generate the compilation context for the rest of this
// function. Currently, we generate a fresh context for every function.
// Since we're only generating one function per `Program`, this makes
// complete sense. However, in the future, we may want to revisit this
// decision.
let mut ctx = Context::new();
let user_func_name = UserFuncName::user(0, func_id.as_u32());
ctx.func = Function::with_name_signature(user_func_name, basic_signature);
// We generate a table of every string that we use in the program, here.
// Cranelift is going to require us to have this in a particular structure
// (`GlobalValue`) so that we can reference them later, and it's going to
// be tricky to generate those on the fly. So we just generate the set we
// need here, and then have ir around in the table for later.
let string_table = self.build_string_table(&mut ctx.func, &program)?;
// In the future, we might want to see what runtime functions the function
// we were given uses, and then only include those functions that we care
// about. Presumably, we'd use some sort of lookup table like we do for
// strings. But for now, we only have one runtime function, and we're pretty
// sure we're always going to use it, so we just declare it (and reference
// it) directly.
let print_func_ref = self.runtime_functions.include_runtime_function(
"print",
&mut self.module,
&mut ctx.func,
)?;
// In the case of the JIT, there may be symbols we've already defined outside
// the context of this particular `Progam`, which we might want to reference.
// Just like with strings, generating the `GlobalValue`s we need can potentially
// be a little tricky to do on the fly, so we generate the complete list right
// here and then use it later.
let pre_defined_symbols: HashMap<String, (GlobalValue, ConstantType)> = self
.defined_symbols
.iter()
.map(|(k, (v, t))| {
let local_data = self.module.declare_data_in_func(*v, &mut ctx.func);
(k.clone(), (local_data, *t))
})
.collect();
// The last table we're going to need is our local variable table, to store
// variables used in this `Program` but not used outside of it. For whatever
// reason, Cranelift requires us to generate unique indexes for each of our
// variables; we just use a simple incrementing counter for that.
let mut variable_table = HashMap::new();
let mut next_var_num = 1;
// Finally (!), we generate the function builder that we're going to use to
// make this function!
let mut fctx = FunctionBuilderContext::new();
let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx);
// Make the initial block to put instructions in. Later, when we have control
// flow, we might add more blocks after this one. But, for now, we only have
// the one block.
let main_block = builder.create_block();
builder.switch_to_block(main_block);
// Compiling a function is just compiling each of the statements in order.
// At the moment, we do the pattern match for statements here, and then
// directly compile the statements. If/when we add more statement forms,
// this is likely to become more cumbersome, and we'll want to separate
// these off. But for now, given the amount of tables we keep around to track
// state, it's easier to just include them.
for stmt in program.statements.drain(..) {
match stmt {
// Print statements are fairly easy to compile: we just lookup the
// output buffer, the address of the string to print, and the value
// of whatever variable we're printing. Then we just call print.
Statement::Print(ann, t, var) => {
// Get the output buffer (or null) from our general compilation context.
let buffer_ptr = self.output_buffer_ptr();
let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64);
// Get a reference to the string we want to print.
let local_name_ref = string_table.get(&var).unwrap();
let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref);
// Look up the value for the variable. Because this might be a
// global variable (and that requires special logic), we just turn
// this into an `Expression` and re-use the logic in that implementation.
let (val, vtype) = ValueOrRef::Ref(ann, t, var).into_crane(
&mut builder,
&variable_table,
&pre_defined_symbols,
)?;
let vtype_repr = builder.ins().iconst(types::I64, vtype as i64);
let casted_val = match vtype {
ConstantType::U64 | ConstantType::I64 => val,
ConstantType::I8 | ConstantType::I16 | ConstantType::I32 => {
builder.ins().sextend(types::I64, val)
}
ConstantType::U8 | ConstantType::U16 | ConstantType::U32 => {
builder.ins().uextend(types::I64, val)
}
};
// Finally, we can generate the call to print.
builder.ins().call(
print_func_ref,
&[buffer_ptr, name_ptr, vtype_repr, casted_val],
);
}
// Variable binding is a little more con
Statement::Binding(_, var_name, _, value) => {
// Kick off to the `Expression` implementation to see what value we're going
// to bind to this variable.
let (val, etype) =
value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?;
// Now the question is: is this a local variable, or a global one?
if let Some((global_id, ctype)) = pre_defined_symbols.get(var_name.as_str()) {
// It's a global variable! In this case, we assume that someone has already
// dedicated some space in memory to store this value. We look this location
// up, and then tell Cranelift to store the value there.
assert_eq!(etype, *ctype);
let val_ptr = builder
.ins()
.symbol_value(ir::Type::from(*ctype), *global_id);
builder.ins().store(MemFlags::new(), val, val_ptr, 0);
} else {
// It's a local variable! In this case, we need to allocate a new Cranelift
// `Variable` for this variable, which we do using our `next_var_num` counter.
// (While we're doing this, we also increment `next_var_num`, so that we get
// a fresh `Variable` next time. This is one of those very narrow cases in which
// I wish Rust had an increment expression.)
let var = Variable::new(next_var_num);
next_var_num += 1;
// We can add the variable directly to our local variable map; it's `Copy`.
variable_table.insert(var_name, (var, etype));
// Now we tell Cranelift about our new variable!
builder.declare_var(var, ir::Type::from(etype));
builder.def_var(var, val);
}
}
}
}
// Now that we're done, inject a return function (one with no actual value; basically
// the equivalent of Rust's `return;`). We then seal the block (which lets Cranelift
// know that the block is done), and then finalize the function (which lets Cranelift
// know we're done with the function).
builder.ins().return_(&[]);
builder.seal_block(main_block);
builder.finalize();
// This is a little odd. We want to tell the rest of Cranelift about this function,
// so we register it using the function ID and our builder context. However, the
// result of this function isn't actually super helpful. So we ignore it, unless
// it's an error.
self.module.define_function(func_id, &mut ctx)?;
// done!
Ok(func_id)
}
// Build the string table for use in referencing strings later.
//
// This function is slightly smart, in that it only puts strings in the table that
// are used by the `Program`. (Thanks to `Progam::strings()`!) If the strings have
// been declared globally, via `Backend::define_string()`, we will re-use that data.
// Otherwise, this will define the string for you.
fn build_string_table(
&mut self,
func: &mut Function,
program: &Program,
) -> Result<StringTable, BackendError> {
let mut string_table = HashMap::new();
for interned_value in program.strings().drain() {
let global_id = match self.defined_strings.get(interned_value.as_str()) {
Some(x) => *x,
None => self.define_string(interned_value.as_str())?,
};
let local_data = self.module.declare_data_in_func(global_id, func);
string_table.insert(interned_value, local_data);
}
Ok(string_table)
}
}
impl Expression {
fn into_crane(
self,
builder: &mut FunctionBuilder,
local_variables: &HashMap<ArcIntern<String>, (Variable, ConstantType)>,
global_variables: &HashMap<String, (GlobalValue, ConstantType)>,
) -> Result<(entities::Value, ConstantType), BackendError> {
match self {
Expression::Atomic(x) => x.into_crane(builder, local_variables, global_variables),
Expression::Cast(_, target_type, expr) => {
let (val, val_type) =
expr.into_crane(builder, local_variables, global_variables)?;
match (val_type, &target_type) {
(ConstantType::I8, Type::Primitive(PrimitiveType::I8)) => Ok((val, val_type)),
(ConstantType::I8, Type::Primitive(PrimitiveType::I16)) => {
Ok((builder.ins().sextend(types::I16, val), ConstantType::I16))
}
(ConstantType::I8, Type::Primitive(PrimitiveType::I32)) => {
Ok((builder.ins().sextend(types::I32, val), ConstantType::I32))
}
(ConstantType::I8, Type::Primitive(PrimitiveType::I64)) => {
Ok((builder.ins().sextend(types::I64, val), ConstantType::I64))
}
(ConstantType::I16, Type::Primitive(PrimitiveType::I16)) => Ok((val, val_type)),
(ConstantType::I16, Type::Primitive(PrimitiveType::I32)) => {
Ok((builder.ins().sextend(types::I32, val), ConstantType::I32))
}
(ConstantType::I16, Type::Primitive(PrimitiveType::I64)) => {
Ok((builder.ins().sextend(types::I64, val), ConstantType::I64))
}
(ConstantType::I32, Type::Primitive(PrimitiveType::I32)) => Ok((val, val_type)),
(ConstantType::I32, Type::Primitive(PrimitiveType::I64)) => {
Ok((builder.ins().sextend(types::I64, val), ConstantType::I64))
}
(ConstantType::I64, Type::Primitive(PrimitiveType::I64)) => Ok((val, val_type)),
(ConstantType::U8, Type::Primitive(PrimitiveType::U8)) => Ok((val, val_type)),
(ConstantType::U8, Type::Primitive(PrimitiveType::U16)) => {
Ok((builder.ins().uextend(types::I16, val), ConstantType::U16))
}
(ConstantType::U8, Type::Primitive(PrimitiveType::U32)) => {
Ok((builder.ins().uextend(types::I32, val), ConstantType::U32))
}
(ConstantType::U8, Type::Primitive(PrimitiveType::U64)) => {
Ok((builder.ins().uextend(types::I64, val), ConstantType::U64))
}
(ConstantType::U16, Type::Primitive(PrimitiveType::U16)) => Ok((val, val_type)),
(ConstantType::U16, Type::Primitive(PrimitiveType::U32)) => {
Ok((builder.ins().uextend(types::I32, val), ConstantType::U32))
}
(ConstantType::U16, Type::Primitive(PrimitiveType::U64)) => {
Ok((builder.ins().uextend(types::I64, val), ConstantType::U64))
}
(ConstantType::U32, Type::Primitive(PrimitiveType::U32)) => Ok((val, val_type)),
(ConstantType::U32, Type::Primitive(PrimitiveType::U64)) => {
Ok((builder.ins().uextend(types::I64, val), ConstantType::U64))
}
(ConstantType::U64, Type::Primitive(PrimitiveType::U64)) => Ok((val, val_type)),
_ => Err(BackendError::InvalidTypeCast {
from: val_type.into(),
to: target_type,
}),
}
}
Expression::Primitive(_, _, prim, mut vals) => {
let mut values = vec![];
let mut first_type = None;
for val in vals.drain(..) {
let (compiled, compiled_type) =
val.into_crane(builder, local_variables, global_variables)?;
if let Some(leftmost_type) = first_type {
assert_eq!(leftmost_type, compiled_type);
} else {
first_type = Some(compiled_type);
}
values.push(compiled);
}
let first_type = first_type.expect("primitive op has at least one argument");
// then we just need to tell Cranelift how to do each of our primitives! Much
// like Statements, above, we probably want to eventually shuffle this off into
// a separate function (maybe something off `Primitive`), but for now it's simple
// enough that we just do the `match` here.
match prim {
Primitive::Plus => Ok((builder.ins().iadd(values[0], values[1]), first_type)),
Primitive::Minus if values.len() == 2 => {
Ok((builder.ins().isub(values[0], values[1]), first_type))
}
Primitive::Minus => Ok((builder.ins().ineg(values[0]), first_type)),
Primitive::Times => Ok((builder.ins().imul(values[0], values[1]), first_type)),
Primitive::Divide if first_type.is_signed() => {
Ok((builder.ins().sdiv(values[0], values[1]), first_type))
}
Primitive::Divide => Ok((builder.ins().udiv(values[0], values[1]), first_type)),
}
}
}
}
}
// Just to avoid duplication, this just leverages the `From<ValueOrRef>` trait implementation
// for `ValueOrRef` to compile this via the `Expression` logic, above.
impl ValueOrRef {
fn into_crane(
self,
builder: &mut FunctionBuilder,
local_variables: &HashMap<ArcIntern<String>, (Variable, ConstantType)>,
global_variables: &HashMap<String, (GlobalValue, ConstantType)>,
) -> Result<(entities::Value, ConstantType), BackendError> {
match self {
// Values are pretty straightforward to compile, mostly because we only
// have one type of variable, and it's an integer type.
ValueOrRef::Value(_, _, val) => match val {
Value::I8(_, v) => {
Ok((builder.ins().iconst(types::I8, v as i64), ConstantType::I8))
}
Value::I16(_, v) => Ok((
builder.ins().iconst(types::I16, v as i64),
ConstantType::I16,
)),
Value::I32(_, v) => Ok((
builder.ins().iconst(types::I32, v as i64),
ConstantType::I32,
)),
Value::I64(_, v) => Ok((builder.ins().iconst(types::I64, v), ConstantType::I64)),
Value::U8(_, v) => {
Ok((builder.ins().iconst(types::I8, v as i64), ConstantType::U8))
}
Value::U16(_, v) => Ok((
builder.ins().iconst(types::I16, v as i64),
ConstantType::U16,
)),
Value::U32(_, v) => Ok((
builder.ins().iconst(types::I32, v as i64),
ConstantType::U32,
)),
Value::U64(_, v) => Ok((
builder.ins().iconst(types::I64, v as i64),
ConstantType::U64,
)),
},
ValueOrRef::Ref(_, _, name) => {
// first we see if this is a local variable (which is nicer, from an
// optimization point of view.)
if let Some((local_var, etype)) = local_variables.get(&name) {
return Ok((builder.use_var(*local_var), *etype));
}
// then we check to see if this is a global reference, which requires us to
// first lookup where the value is stored, and then load it.
if let Some((global_var, etype)) = global_variables.get(name.as_ref()) {
let cranelift_type = ir::Type::from(*etype);
let val_ptr = builder.ins().symbol_value(cranelift_type, *global_var);
return Ok((
builder
.ins()
.load(cranelift_type, MemFlags::new(), val_ptr, 0),
*etype,
));
}
// this should never happen, because we should have made sure that there are
// no unbound variables a long time before this. but still ...
Err(BackendError::VariableLookupFailure(name))
}
}
}
}