📜 Add better documentation across the compiler. (#3)
These changes pay particular attention to API endpoints, to try to ensure that any rustdocs generated are detailed and sensible. A good next step, eventually, might be to include doctest examples, as well. For the moment, it's not clear that they would provide a lot of value, though. In addition, this does a couple refactors to simplify the code base in ways that make things clearer or, at least, briefer.
This commit is contained in:
@@ -8,15 +8,31 @@ use cranelift_codegen::ir::{
|
||||
use cranelift_codegen::isa::CallConv;
|
||||
use cranelift_codegen::Context;
|
||||
use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext, Variable};
|
||||
use cranelift_module::{FuncId, Linkage, Module, ModuleError};
|
||||
use cranelift_module::{FuncId, Linkage, Module};
|
||||
use internment::ArcIntern;
|
||||
|
||||
use crate::backend::error::BackendError;
|
||||
use crate::backend::Backend;
|
||||
|
||||
/// When we're compiling, we might need to reference some of the strings built into
|
||||
/// the source code; to do so, we need a `GlobalValue`. Perhaps unexpectedly, given
|
||||
/// the name, `GlobalValue`s are specific to a single function we're compiling, so
|
||||
/// we end up computing this table for every function.
|
||||
///
|
||||
/// This just a handy type alias to avoid a lot of confusion in the functions.
|
||||
type StringTable = HashMap<ArcIntern<String>, GlobalValue>;
|
||||
|
||||
impl<M: Module> Backend<M> {
|
||||
/// Compile the given `Program` into a function with the given name.
|
||||
///
|
||||
/// At some point, the use of `Program` is going to change; however, for the
|
||||
/// moment, we have no notion of a function in our language so the whole input
|
||||
/// is converted into a single output function. The type of the generated
|
||||
/// function is, essentially, `fn() -> ()`: it takes no arguments and returns
|
||||
/// no value.
|
||||
///
|
||||
/// The function provided can then be either written to a file (if using a
|
||||
/// static Cranelift backend) or executed directly (if using the Cranelift JIT).
|
||||
pub fn compile_function(
|
||||
&mut self,
|
||||
function_name: &str,
|
||||
@@ -28,21 +44,47 @@ impl<M: Module> Backend<M> {
|
||||
call_conv: CallConv::SystemV,
|
||||
};
|
||||
|
||||
// this generates the handle for the function that we'll eventually want to
|
||||
// return to the user. For now, we declare all functions defined by this
|
||||
// function as public/global/exported, although we may want to reconsider
|
||||
// this decision later.
|
||||
let func_id =
|
||||
self.module
|
||||
.declare_function(function_name, Linkage::Export, &basic_signature)?;
|
||||
let mut ctx = Context::new();
|
||||
ctx.func =
|
||||
Function::with_name_signature(UserFuncName::user(0, func_id.as_u32()), basic_signature);
|
||||
|
||||
// Next we have to generate the compilation context for the rest of this
|
||||
// function. Currently, we generate a fresh context for every function.
|
||||
// Since we're only generating one function per `Program`, this makes
|
||||
// complete sense. However, in the future, we may want to revisit this
|
||||
// decision.
|
||||
let mut ctx = Context::new();
|
||||
let user_func_name = UserFuncName::user(0, func_id.as_u32());
|
||||
ctx.func = Function::with_name_signature(user_func_name, basic_signature);
|
||||
|
||||
// We generate a table of every string that we use in the program, here.
|
||||
// Cranelift is going to require us to have this in a particular structure
|
||||
// (`GlobalValue`) so that we can reference them later, and it's going to
|
||||
// be tricky to generate those on the fly. So we just generate the set we
|
||||
// need here, and then have ir around in the table for later.
|
||||
let string_table = self.build_string_table(&mut ctx.func, &program)?;
|
||||
let mut variable_table = HashMap::new();
|
||||
let mut next_var_num = 1;
|
||||
|
||||
// In the future, we might want to see what runtime functions the function
|
||||
// we were given uses, and then only include those functions that we care
|
||||
// about. Presumably, we'd use some sort of lookup table like we do for
|
||||
// strings. But for now, we only have one runtime function, and we're pretty
|
||||
// sure we're always going to use it, so we just declare it (and reference
|
||||
// it) directly.
|
||||
let print_func_ref = self.runtime_functions.include_runtime_function(
|
||||
"print",
|
||||
&mut self.module,
|
||||
&mut ctx.func,
|
||||
)?;
|
||||
|
||||
// In the case of the JIT, there may be symbols we've already defined outside
|
||||
// the context of this particular `Progam`, which we might want to reference.
|
||||
// Just like with strings, generating the `GlobalValue`s we need can potentially
|
||||
// be a little tricky to do on the fly, so we generate the complete list right
|
||||
// here and then use it later.
|
||||
let pre_defined_symbols: HashMap<String, GlobalValue> = self
|
||||
.defined_symbols
|
||||
.iter()
|
||||
@@ -52,67 +94,88 @@ impl<M: Module> Backend<M> {
|
||||
})
|
||||
.collect();
|
||||
|
||||
// The last table we're going to need is our local variable table, to store
|
||||
// variables used in this `Program` but not used outside of it. For whatever
|
||||
// reason, Cranelift requires us to generate unique indexes for each of our
|
||||
// variables; we just use a simple incrementing counter for that.
|
||||
let mut variable_table = HashMap::new();
|
||||
let mut next_var_num = 1;
|
||||
|
||||
// Finally (!), we generate the function builder that we're going to use to
|
||||
// make this function!
|
||||
let mut fctx = FunctionBuilderContext::new();
|
||||
let mut builder = FunctionBuilder::new(&mut ctx.func, &mut fctx);
|
||||
|
||||
// Make the initial block to put instructions in. Later, when we have control
|
||||
// flow, we might add more blocks after this one. But, for now, we only have
|
||||
// the one block.
|
||||
let main_block = builder.create_block();
|
||||
builder.switch_to_block(main_block);
|
||||
|
||||
// Compiling a function is just compiling each of the statements in order.
|
||||
// At the moment, we do the pattern match for statements here, and then
|
||||
// directly compile the statements. If/when we add more statement forms,
|
||||
// this is likely to become more cumbersome, and we'll want to separate
|
||||
// these off. But for now, given the amount of tables we keep around to track
|
||||
// state, it's easier to just include them.
|
||||
for stmt in program.statements.drain(..) {
|
||||
match stmt {
|
||||
// Print statements are fairly easy to compile: we just lookup the
|
||||
// output buffer, the address of the string to print, and the value
|
||||
// of whatever variable we're printing. Then we just call print.
|
||||
Statement::Print(ann, var) => {
|
||||
// Get the output buffer (or null) from our general compilation context.
|
||||
let buffer_ptr = self.output_buffer_ptr();
|
||||
let buffer_ptr = builder.ins().iconst(types::I64, buffer_ptr as i64);
|
||||
|
||||
// Get a reference to the string we want to print.
|
||||
let local_name_ref = string_table.get(&var).unwrap();
|
||||
let name_ptr = builder.ins().symbol_value(types::I64, *local_name_ref);
|
||||
let val = ValueOrRef::Ref(ann, var).into_cranelift(
|
||||
|
||||
// Look up the value for the variable. Because this might be a
|
||||
// global variable (and that requires special logic), we just turn
|
||||
// this into an `Expression` and re-use the logic in that implementation.
|
||||
let val = Expression::Reference(ann, var).into_crane(
|
||||
&mut builder,
|
||||
&variable_table,
|
||||
&pre_defined_symbols,
|
||||
)?;
|
||||
|
||||
// Finally, we can generate the call to print.
|
||||
builder
|
||||
.ins()
|
||||
.call(print_func_ref, &[buffer_ptr, name_ptr, val]);
|
||||
}
|
||||
|
||||
// Variable binding is a little more con
|
||||
Statement::Binding(_, var_name, value) => {
|
||||
let val = match value {
|
||||
Expression::Value(_, Value::Number(_, v)) => {
|
||||
builder.ins().iconst(types::I64, v)
|
||||
}
|
||||
|
||||
Expression::Reference(_, name) => {
|
||||
let value_var_num = variable_table.get(&name).unwrap();
|
||||
builder.use_var(Variable::new(*value_var_num))
|
||||
}
|
||||
|
||||
Expression::Primitive(_, prim, mut vals) => {
|
||||
let right = vals.pop().unwrap().into_cranelift(
|
||||
&mut builder,
|
||||
&variable_table,
|
||||
&pre_defined_symbols,
|
||||
)?;
|
||||
let left = vals.pop().unwrap().into_cranelift(
|
||||
&mut builder,
|
||||
&variable_table,
|
||||
&pre_defined_symbols,
|
||||
)?;
|
||||
|
||||
match prim {
|
||||
Primitive::Plus => builder.ins().iadd(left, right),
|
||||
Primitive::Minus => builder.ins().isub(left, right),
|
||||
Primitive::Times => builder.ins().imul(left, right),
|
||||
Primitive::Divide => builder.ins().sdiv(left, right),
|
||||
}
|
||||
}
|
||||
};
|
||||
// Kick off to the `Expression` implementation to see what value we're going
|
||||
// to bind to this variable.
|
||||
let val =
|
||||
value.into_crane(&mut builder, &variable_table, &pre_defined_symbols)?;
|
||||
|
||||
// Now the question is: is this a local variable, or a global one?
|
||||
if let Some(global_id) = pre_defined_symbols.get(var_name.as_str()) {
|
||||
// It's a global variable! In this case, we assume that someone has already
|
||||
// dedicated some space in memory to store this value. We look this location
|
||||
// up, and then tell Cranelift to store the value there.
|
||||
let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
|
||||
builder.ins().store(MemFlags::new(), val, val_ptr, 0);
|
||||
} else {
|
||||
// It's a local variable! In this case, we need to allocate a new Cranelift
|
||||
// `Variable` for this variable, which we do using our `next_var_num` counter.
|
||||
// (While we're doing this, we also increment `next_var_num`, so that we get
|
||||
// a fresh `Variable` next time. This is one of those very narrow cases in which
|
||||
// I wish Rust had an increment expression.)
|
||||
let var = Variable::new(next_var_num);
|
||||
variable_table.insert(var_name, next_var_num);
|
||||
next_var_num += 1;
|
||||
|
||||
// We can add the variable directly to our local variable map; it's `Copy`.
|
||||
variable_table.insert(var_name, var);
|
||||
|
||||
// Now we tell Cranelift about our new variable, which has type I64 because
|
||||
// everything we have at this point is of type I64. Once it's declare, we
|
||||
// define it as having the value we computed above.
|
||||
builder.declare_var(var, types::I64);
|
||||
builder.def_var(var, val);
|
||||
}
|
||||
@@ -120,15 +183,30 @@ impl<M: Module> Backend<M> {
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we're done, inject a return function (one with no actual value; basically
|
||||
// the equivalent of Rust's `return;`). We then seal the block (which lets Cranelift
|
||||
// know that the block is done), and then finalize the function (which lets Cranelift
|
||||
// know we're done with the function).
|
||||
builder.ins().return_(&[]);
|
||||
builder.seal_block(main_block);
|
||||
builder.finalize();
|
||||
|
||||
// This is a little odd. We want to tell the rest of Cranelift about this function,
|
||||
// so we register it using the function ID and our builder context. However, the
|
||||
// result of this function isn't actually super helpful. So we ignore it, unless
|
||||
// it's an error.
|
||||
let _ = self.module.define_function(func_id, &mut ctx)?;
|
||||
|
||||
// done!
|
||||
Ok(func_id)
|
||||
}
|
||||
|
||||
// Build the string table for use in referencing strings later.
|
||||
//
|
||||
// This function is slightly smart, in that it only puts strings in the table that
|
||||
// are used by the `Program`. (Thanks to `Progam::strings()`!) If the strings have
|
||||
// been declared globally, via `Backend::define_string()`, we will re-use that data.
|
||||
// Otherwise, this will define the string for you.
|
||||
fn build_string_table(
|
||||
&mut self,
|
||||
func: &mut Function,
|
||||
@@ -149,30 +227,73 @@ impl<M: Module> Backend<M> {
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueOrRef {
|
||||
fn into_cranelift(
|
||||
impl Expression {
|
||||
fn into_crane(
|
||||
self,
|
||||
builder: &mut FunctionBuilder,
|
||||
local_variables: &HashMap<ArcIntern<String>, usize>,
|
||||
local_variables: &HashMap<ArcIntern<String>, Variable>,
|
||||
global_variables: &HashMap<String, GlobalValue>,
|
||||
) -> Result<entities::Value, ModuleError> {
|
||||
) -> Result<entities::Value, BackendError> {
|
||||
match self {
|
||||
ValueOrRef::Value(_, value) => match value {
|
||||
Value::Number(_base, numval) => Ok(builder.ins().iconst(types::I64, numval)),
|
||||
},
|
||||
// Values are pretty straightforward to compile, mostly because we only
|
||||
// have one type of variable, and it's an integer type.
|
||||
Expression::Value(_, Value::Number(_, v)) => Ok(builder.ins().iconst(types::I64, v)),
|
||||
|
||||
ValueOrRef::Ref(_, name) => {
|
||||
if let Some(local_num) = local_variables.get(&name) {
|
||||
return Ok(builder.use_var(Variable::new(*local_num)));
|
||||
Expression::Reference(_, name) => {
|
||||
// first we see if this is a local variable (which is nicer, from an
|
||||
// optimization point of view.)
|
||||
if let Some(local_var) = local_variables.get(&name) {
|
||||
return Ok(builder.use_var(*local_var));
|
||||
}
|
||||
|
||||
if let Some(global_id) = global_variables.get(name.as_str()) {
|
||||
let val_ptr = builder.ins().symbol_value(types::I64, *global_id);
|
||||
// then we check to see if this is a global reference, which requires us to
|
||||
// first lookup where the value is stored, and then load it.
|
||||
if let Some(global_var) = global_variables.get(name.as_ref()) {
|
||||
let val_ptr = builder.ins().symbol_value(types::I64, *global_var);
|
||||
return Ok(builder.ins().load(types::I64, MemFlags::new(), val_ptr, 0));
|
||||
}
|
||||
|
||||
Err(ModuleError::Undeclared(name.to_string()))
|
||||
// this should never happen, because we should have made sure that there are
|
||||
// no unbound variables a long time before this. but still ...
|
||||
Err(BackendError::VariableLookupFailure(name))
|
||||
}
|
||||
|
||||
Expression::Primitive(_, prim, mut vals) => {
|
||||
// we're going to use `pop`, so we're going to pull and compile the right value ...
|
||||
let right =
|
||||
vals.pop()
|
||||
.unwrap()
|
||||
.into_crane(builder, local_variables, global_variables)?;
|
||||
// ... and then the left.
|
||||
let left =
|
||||
vals.pop()
|
||||
.unwrap()
|
||||
.into_crane(builder, local_variables, global_variables)?;
|
||||
|
||||
// then we just need to tell Cranelift how to do each of our primitives! Much
|
||||
// like Statements, above, we probably want to eventually shuffle this off into
|
||||
// a separate function (maybe something off `Primitive`), but for now it's simple
|
||||
// enough that we just do the `match` here.
|
||||
match prim {
|
||||
Primitive::Plus => Ok(builder.ins().iadd(left, right)),
|
||||
Primitive::Minus => Ok(builder.ins().isub(left, right)),
|
||||
Primitive::Times => Ok(builder.ins().imul(left, right)),
|
||||
Primitive::Divide => Ok(builder.ins().sdiv(left, right)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Just to avoid duplication, this just leverages the `From<ValueOrRef>` trait implementation
|
||||
// for `ValueOrRef` to compile this via the `Expression` logic, above.
|
||||
impl ValueOrRef {
|
||||
fn into_crane(
|
||||
self,
|
||||
builder: &mut FunctionBuilder,
|
||||
local_variables: &HashMap<ArcIntern<String>, Variable>,
|
||||
global_variables: &HashMap<String, GlobalValue>,
|
||||
) -> Result<entities::Value, BackendError> {
|
||||
Expression::from(self).into_crane(builder, local_variables, global_variables)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user