Files
ngr/src/syntax/validate.rs

356 lines
15 KiB
Rust

use crate::eval::PrimitiveType;
use crate::syntax::{Expression, Location, Program, StructureDefinition, TopLevel};
use crate::util::scoped_map::ScopedMap;
use crate::util::warning_result::WarningResult;
use codespan_reporting::diagnostic::Diagnostic;
use std::collections::HashMap;
use std::str::FromStr;
use super::{FunctionDefinition, Name, Type};
/// An error we found while validating the input program.
///
/// These errors indicate that we should stop trying to compile
/// the program, because it's just fundamentally broken in a way
/// that we're not going to be able to work through. As with most
/// of these errors, we recommend converting this to a [`Diagnostic`]
/// and using [`codespan_reporting`] to present them to the user.
#[derive(Debug)]
pub enum Error {
UnboundVariable(Location, String),
UnknownType(Location, String),
}
impl From<Error> for Diagnostic<usize> {
fn from(x: Error) -> Self {
match &x {
Error::UnboundVariable(location, name) => location
.labelled_error("unbound here")
.with_message(format!("Unbound variable '{}'", name)),
Error::UnknownType(location, name) => location
.labelled_error("type referenced here")
.with_message(format!("Unknown type '{}'", name)),
}
}
}
/// A problem we found validating the input that isn't critical.
///
/// These are things that the user might want to do something about,
/// but we can keep going without it being a problem. As with most of
/// these things, if you want to present this information to the user,
/// the best way to do so is via [`From`] and [`Diagnostic`], and then
/// interactions via [`codespan_reporting`].
#[derive(Debug, PartialEq, Eq)]
pub enum Warning {
ShadowedVariable(Location, Location, String),
}
impl From<Warning> for Diagnostic<usize> {
fn from(x: Warning) -> Self {
match &x {
Warning::ShadowedVariable(original, new, name) => Diagnostic::warning()
.with_labels(vec![
new.primary_label().with_message("variable rebound here"),
original
.secondary_label()
.with_message("original binding site"),
])
.with_message(format!("Variable '{}' is rebound", name)),
}
}
}
impl Program {
/// Validate that the program makes semantic sense, not just syntactic sense.
///
/// This checks for things like references to variables that don't exist, for
/// example, and generates warnings for things that are inadvisable but not
/// actually a problem.
pub fn validate(raw_syntax: Vec<TopLevel>) -> WarningResult<Program, Warning, Error> {
let mut bound_variables = ScopedMap::new();
Self::validate_with_bindings(raw_syntax, &mut bound_variables)
}
/// Validate that the program makes semantic sense, not just syntactic sense.
///
/// This checks for things like references to variables that don't exist, for
/// example, and generates warnings for things that are inadvisable but not
/// actually a problem.
pub fn validate_with_bindings(
raw_syntax: Vec<TopLevel>,
bound_variables: &mut ScopedMap<String, Location>,
) -> WarningResult<Program, Warning, Error> {
let mut functions = HashMap::new();
let mut structures = HashMap::new();
let mut result = WarningResult::ok(vec![]);
let location = Location::infer_from(&raw_syntax);
for stmt in raw_syntax.into_iter() {
match stmt {
TopLevel::Expression(expr) => {
let expr_result =
expr.validate(bound_variables, &mut structures, &mut functions);
result = result.merge_with(expr_result, |mut previous, current| {
previous.push(current);
Ok(previous)
});
}
TopLevel::Structure(loc, name, fields) => {
let definition = StructureDefinition::new(
loc,
name.clone(),
fields.into_iter().map(|(n, t)| (n, Some(t))).collect(),
);
structures.insert(name, definition);
}
}
}
result.map(move |exprs| Program {
functions,
structures,
body: Expression::Block(location, exprs),
})
}
}
impl Expression {
fn validate(
self,
variable_map: &mut ScopedMap<String, Location>,
structure_map: &mut HashMap<Name, StructureDefinition>,
function_map: &mut HashMap<Name, FunctionDefinition>,
) -> WarningResult<Expression, Warning, Error> {
match self {
Expression::Value(_, _) => WarningResult::ok(self),
Expression::Constructor(location, name, fields) => {
let mut result = WarningResult::ok(vec![]);
for (name, expr) in fields.into_iter() {
let expr_result = expr.validate(variable_map, structure_map, function_map);
result = result.merge_with(expr_result, move |mut fields, new_expr| {
fields.push((name, new_expr));
Ok(fields)
});
}
result.map(move |fields| Expression::Constructor(location, name, fields))
}
Expression::Reference(ref var)
if variable_map.contains_key(&var.original_name().to_string()) =>
{
WarningResult::ok(self)
}
Expression::Reference(var) => WarningResult::err(Error::UnboundVariable(
var.location().clone(),
var.original_name().to_string(),
)),
Expression::FieldRef(location, exp, field) => exp
.validate(variable_map, structure_map, function_map)
.map(|x| Expression::FieldRef(location, Box::new(x), field)),
Expression::Cast(location, t, expr) => {
let mut expr_result = expr.validate(variable_map, structure_map, function_map);
if PrimitiveType::from_str(&t).is_err() {
expr_result.add_error(Error::UnknownType(location.clone(), t.clone()));
}
expr_result.map(|e| Expression::Cast(location, t, Box::new(e)))
}
// FIXME: Check for valid primitives here!!
Expression::Primitive(_, _) => WarningResult::ok(self),
Expression::Call(loc, func, args) => {
let mut result = func
.validate(variable_map, structure_map, function_map)
.map(|x| (x, vec![]));
for arg in args.into_iter() {
let expr_result = arg.validate(variable_map, structure_map, function_map);
result =
result.merge_with(expr_result, |(func, mut previous_args), new_arg| {
previous_args.push(new_arg);
Ok((func, previous_args))
});
}
result.map(|(func, args)| Expression::Call(loc, Box::new(func), args))
}
Expression::Block(loc, stmts) => {
let mut result = WarningResult::ok(vec![]);
for stmt in stmts.into_iter() {
let stmt_result = stmt.validate(variable_map, structure_map, function_map);
result = result.merge_with(stmt_result, |mut stmts, stmt| {
stmts.push(stmt);
Ok(stmts)
});
}
result.map(|stmts| Expression::Block(loc, stmts))
}
Expression::Binding(loc, var, val) => {
// we're going to make the decision that a variable is not bound in the right
// hand side of its binding, which makes a lot of things easier. So we'll just
// immediately check the expression, and go from there.
let mut result = val.validate(variable_map, structure_map, function_map);
if let Some(original_binding_site) =
variable_map.get(&var.original_name().to_string())
{
result.add_warning(Warning::ShadowedVariable(
original_binding_site.clone(),
loc.clone(),
var.to_string(),
));
} else {
variable_map.insert(var.to_string(), loc.clone());
}
result.map(|val| Expression::Binding(loc, var, Box::new(val)))
}
Expression::Function(loc, name, mut arguments, return_type, body) => {
let mut result = WarningResult::ok(());
// first we should check for shadowing
for new_name in name.iter().chain(arguments.iter().map(|x| &x.0)) {
if let Some(original_site) = variable_map.get(new_name.original_name()) {
result.add_warning(Warning::ShadowedVariable(
original_site.clone(),
loc.clone(),
new_name.original_name().to_string(),
));
}
}
// the function name is now available in our current scope, if the function was given one
if let Some(name) = &name {
variable_map.insert(name.original_name().to_string(), name.location().clone());
}
// the arguments are available in a new scope, which we will use to validate the function
// body
variable_map.new_scope();
for (arg, _) in arguments.iter() {
variable_map.insert(arg.original_name().to_string(), arg.location().clone());
}
let body_result = body.validate(variable_map, structure_map, function_map);
variable_map.release_scope();
body_result.merge_with(result, move |mut body, _| {
// figure out what, if anything, needs to be in the closure for this function.
let mut free_variables = body.free_variables();
for (n, _) in arguments.iter() {
free_variables.remove(n);
}
// generate a new name for the closure type we're about to create
let closure_type_name = Name::located_gensym(
loc.clone(),
name.as_ref().map(Name::original_name).unwrap_or("closure_"),
);
// ... and then create a structure type that has all of the free variables
// in it
let closure_type = StructureDefinition::new(
loc.clone(),
closure_type_name.clone(),
free_variables.iter().map(|x| (x.clone(), None)).collect(),
);
// this will become the first argument of the function, so name it and add
// it to the argument list.
let closure_arg = Name::gensym("__closure_arg");
arguments.insert(
0,
(
closure_arg.clone(),
Some(Type::Named(closure_type_name.clone())),
),
);
// Now make a map from the old free variable names to references into
// our closure argument
let rebinds = free_variables
.into_iter()
.map(|n| {
(
n.clone(),
Expression::FieldRef(
n.location().clone(),
Box::new(Expression::Reference(closure_arg.clone())),
n,
),
)
})
.collect::<Vec<(Name, Expression)>>();
let mut rebind_map = rebinds.iter().cloned().collect();
// and replace all the references in the function with this map
body.replace_references(&mut rebind_map);
// OK! This function definitely needs a name; if the user didn't give
// it one, we'll do so.
let function_name =
name.unwrap_or_else(|| Name::located_gensym(loc.clone(), "function"));
// And finally, we can make the function definition and insert it into our global
// list along with the new closure type.
let function = FunctionDefinition::new(
function_name.clone(),
arguments.clone(),
return_type.clone(),
body,
);
structure_map.insert(closure_type_name.clone(), closure_type);
function_map.insert(function_name.clone(), function);
// And the result of this function is a call to a primitive that generates
// the closure value in some sort of reasonable way.
Ok(Expression::Call(
Location::manufactured(),
Box::new(Expression::Primitive(
Location::manufactured(),
Name::new("<closure>", Location::manufactured()),
)),
vec![
Expression::Reference(function_name),
Expression::Constructor(
Location::manufactured(),
closure_type_name,
rebinds,
),
],
))
})
}
}
}
}
#[test]
fn cast_checks_are_reasonable() {
let mut variable_map = ScopedMap::new();
let mut structure_map = HashMap::new();
let mut function_map = HashMap::new();
let good_stmt = Expression::parse(0, "x = <u16>4u8;").expect("valid test case");
let result_good = good_stmt.validate(&mut variable_map, &mut structure_map, &mut function_map);
assert!(result_good.is_ok());
assert!(result_good.warnings().is_empty());
let bad_stmt = Expression::parse(0, "x = <apple>4u8;").expect("valid test case");
let result_err = bad_stmt.validate(&mut variable_map, &mut structure_map, &mut function_map);
assert!(result_err.is_err());
assert!(result_err.warnings().is_empty());
}