From 212ca6cc5374d8a8aaca63da9c71fbdbb9be1880 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sun, 16 Jun 2024 20:59:32 -0700 Subject: [PATCH] CHECKPOINT: Initial syntax arbitrary implementation. --- Cargo.toml | 1 + src/bin/gen_program.rs | 23 + src/syntax.rs | 1 + src/syntax/arbitrary.rs | 984 ++++++++++++++++++++++++++++---------- src/syntax/ast.rs | 4 +- src/syntax/eval.rs | 2 + src/syntax/parser.lalrpop | 7 +- src/syntax/pretty.rs | 22 +- src/syntax/tokens.rs | 65 ++- src/syntax/validate.rs | 7 +- src/type_infer/convert.rs | 8 + src/util.rs | 1 + src/util/weighted_map.rs | 21 + 13 files changed, 867 insertions(+), 279 deletions(-) create mode 100644 src/bin/gen_program.rs create mode 100644 src/util/weighted_map.rs diff --git a/Cargo.toml b/Cargo.toml index 82314ea..2e5e0c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ thiserror = "1.0.57" anyhow = "1.0.80" tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["time", "json", "env-filter"] } +names = "0.14.0" [build-dependencies] lalrpop = "0.20.2" diff --git a/src/bin/gen_program.rs b/src/bin/gen_program.rs new file mode 100644 index 0000000..58d656c --- /dev/null +++ b/src/bin/gen_program.rs @@ -0,0 +1,23 @@ +use ngr::syntax::ProgramGenerator; +use ngr::util::pretty::Allocator; +use proptest::strategy::{Strategy, ValueTree}; +use proptest::test_runner::{Config, TestRunner}; + +fn main() -> Result<(), anyhow::Error> { + let generator = ProgramGenerator::default(); + let runner_config = Config::default(); + let mut runner = TestRunner::new(runner_config); + let program_tree = generator + .new_tree(&mut runner) + .map_err(|e| anyhow::anyhow!("Couldn't generate test program: {}", e))?; + let program = program_tree.current(); + let allocator = Allocator::new(); + let mut stdout = std::io::stdout(); + + for top_level in program.into_iter() { + let docbuilder = top_level.pretty(&allocator); + docbuilder.render(78, &mut stdout)?; + } + + Ok(()) +} diff --git a/src/syntax.rs b/src/syntax.rs index 15ee23a..dbbc97b 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -43,6 +43,7 @@ lalrpop_mod!( pub mod pretty; mod validate; +pub use crate::syntax::arbitrary::ProgramGenerator; pub use crate::syntax::ast::*; pub use crate::syntax::location::Location; pub use crate::syntax::name::Name; diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 1d489a7..6c42c30 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,18 +1,136 @@ -use crate::syntax::ast::{ConstantType, Expression, Program, TopLevel, Value}; +use crate::syntax::ast::{ConstantType, Expression, TopLevel, Type, Value}; use crate::syntax::location::Location; use crate::syntax::name::Name; -use proptest::sample::select; +use crate::util::scoped_map::ScopedMap; +use crate::util::weighted_map::WeightedMap; +use proptest::prelude::Strategy; use proptest::strategy::{NewTree, ValueTree}; use proptest::test_runner::{TestRng, TestRunner}; -use proptest::{ - prelude::{Arbitrary, BoxedStrategy, Strategy}, - strategy::{Just, Union}, -}; -use std::collections::HashMap; -use std::ops::Range; +use rand::distributions::{Distribution, WeightedIndex}; +use rand::Rng; +use std::collections::{HashMap, VecDeque}; +use std::str::FromStr; +use std::sync::atomic::AtomicU64; pub const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*"; +lazy_static::lazy_static! { + static ref BASE_PROGRAM_LENGTH: WeightedIndex = WeightedIndex::new([ + 0, // weight for 0 + 1, // weight for 1 + 1, // weight for 2 + 1, // weight for 3 + 3, // triple weight for 4 + 3, // triple weight for 5 + 3, // triple weight for 6 + 3, // triple weight for 7 + 3, // triple weight for 8 + 3, // triple weight for 9 + 3, // triple weight for 10 + 3, // double weight for 11 + 3, // double weight for 12 + 3, // double weight for 13 + 1, // weight for 14 + 1, // weight for 15 + ]).unwrap(); + + static ref KEEP_FIELD_TYPE_ANNOTATION: WeightedMap = WeightedMap::new(&[ + (3, true), + (1, false), + ]); + + static ref EXPRESSION_TYPE_FREQUENCIES: WeightedMap = WeightedMap::new(&[ + (1, ExpressionType::Value), + (1, ExpressionType::Constructor), + (1, ExpressionType::Reference), + (1, ExpressionType::FieldRef), + (1, ExpressionType::Cast), + (1, ExpressionType::Call), + (1, ExpressionType::Block), + (1, ExpressionType::Binding), + ]); + + static ref TYPE_FREQUENCIES: WeightedMap<&'static str> = WeightedMap::new(&[ + (1, "void"), + (5, "i8"), + (10, "i16"), + (5, "i32"), + (10, "i64"), + (5, "u8"), + (10, "u16"), + (5, "u32"), + (10, "u64"), + (1, ""), + (1, ""), + ]); + + static ref CALL_SHOULD_BE_PRIMITIVE: WeightedMap = WeightedMap::new(&[ + (3, true), + (1, false), + ]); + + static ref CALL_ARGUMENT_COUNT: WeightedIndex = WeightedIndex::new([ + 3, + 20, + 20, + 10, + 5, + 2, + 1, + 1, + 1, + ]).unwrap(); + + static ref BLOCK_LENGTH_MINUS_ONE: WeightedIndex = WeightedIndex::new([ + 5, + 10, + 20, + 20, + 10, + 10, + 5, + 1, + 1, + 1, + ]).unwrap(); + + static ref INNER_BLOCK_TYPE_SHOULD_BE_VOID: WeightedMap = WeightedMap::new(&[ + (1, true), + (1, false), + ]); + + static ref STRUCTURE_FIELD_COUNT: WeightedIndex = WeightedIndex::new([ + 0, // let's not mess around with empty structures here + 10, + 10, + 10, + 10, + 10, + 5, + 5, + 3, + 3, + 3, + 3, + 3, + 1, + 1, + 1, + ]).unwrap(); +} + +#[derive(Clone, Debug)] +enum ExpressionType { + Value, + Constructor, + Reference, + FieldRef, + Cast, + Call, + Block, + Binding, +} + #[derive(Debug, Default)] pub struct ProgramGenerator {} @@ -21,29 +139,617 @@ impl Strategy for ProgramGenerator { type Value = Vec; fn new_tree(&self, runner: &mut TestRunner) -> NewTree { - unimplemented!() + NewTree::::Ok(ProgramTree::new(runner)) } } pub struct ProgramTree { _rng: TestRng, - current: Vec, + current: VecDeque, } +pub enum Requirement { + Function(Name, Vec, Type), + Structure(Name, HashMap), + Variable(Name, Type), +} + +type EstablishedStructMap = HashMap>; + impl ProgramTree { - fn new(mut rng: TestRng) -> Self { + fn new(runner: &mut TestRunner) -> Self { + let mut rng = runner.new_rng(); + let base_program_length = BASE_PROGRAM_LENGTH.sample(&mut rng); + let mut env = ScopedMap::new(); + let mut current = VecDeque::new(); + let mut established_structs = HashMap::new(); + + while current.len() < base_program_length { + let (expression, mut requirements) = + generate_expr(&mut rng, &mut established_structs, &mut env, None); + + current.push_front(TopLevel::Expression(expression)); + while let Some(requirement) = requirements.pop() { + match requirement { + Requirement::Function(name, args, result) => { + let (expression, newreqs) = generate_function( + &mut rng, + &mut established_structs, + &mut env, + name, + args, + result, + ); + current.push_front(TopLevel::Expression(expression)); + requirements.extend(newreqs.into_iter()); + } + + Requirement::Structure(name, fields) => { + let fields = fields + .into_iter() + .map(|(name, ty)| { + if KEEP_FIELD_TYPE_ANNOTATION.sample(&mut rng) { + (name, Some(ty)) + } else { + (name, None) + } + }) + .collect(); + current.push_front(TopLevel::Structure( + Location::manufactured(), + name, + fields, + )) + } + + Requirement::Variable(name, ty) => { + let (newexpr, newreqs) = + generate_expr(&mut rng, &mut established_structs, &mut env, Some(ty)); + let binding = + Expression::Binding(Location::manufactured(), name, Box::new(newexpr)); + current.push_front(TopLevel::Expression(binding)); + requirements.extend(newreqs.into_iter()); + } + } + } + } + ProgramTree { _rng: rng, - current: vec![], + current: current.into_iter().collect(), } } } +fn generate_expr( + rng: &mut TestRng, + established_struct_types: &mut EstablishedStructMap, + env: &mut ScopedMap, + optional_target_type: Option, +) -> (Expression, Vec) { + let target_type = + optional_target_type.unwrap_or_else(|| generate_type(rng, established_struct_types)); + let expression_type = EXPRESSION_TYPE_FREQUENCIES.sample(rng); + + match expression_type { + ExpressionType::Value => match target_type { + Type::Named(ref x) => match ConstantType::from_str(x.current_name()) { + Ok(ct) => (generate_constant(rng, ct), vec![]), + _ => generate_constructor(rng, established_struct_types, env, target_type), + }, + + Type::Struct(_) => { + generate_constructor(rng, established_struct_types, env, target_type) + } + }, + + ExpressionType::Constructor => { + generate_constructor(rng, established_struct_types, env, target_type) + } + + ExpressionType::Reference => { + let mut requirements = vec![]; + let mut available_variables = find_variables_with_type(env, &target_type); + + if available_variables.is_empty() { + let name = generate_name(rng); + requirements.push(Requirement::Variable(name.clone(), target_type.clone())); + available_variables.push(name); + } + + let idx = rng.gen_range(0..available_variables.len()); + let ref_name = available_variables.get(idx).expect("index in range"); + let expr = Expression::Reference(ref_name.clone()); + + (expr, requirements) + } + + ExpressionType::FieldRef => { + let mut requirements = vec![]; + + let mut valid_types = + find_structs_with_field_type(established_struct_types, &target_type); + if valid_types.is_empty() { + let name = generate_name(rng); + let mut fields = generate_structure_fields(rng, established_struct_types); + valid_types = fields + .iter() + .filter(|(_, t)| *t == &target_type) + .map(|(a, b)| (b.clone(), a.clone())) + .collect::>(); + + if valid_types.is_empty() { + let field_name = generate_name(rng); + fields.insert(field_name.clone(), target_type.clone()); + valid_types = vec![(Type::Named(name.clone()), field_name)]; + } + + requirements.push(Requirement::Structure(name.clone(), fields)); + } + + let new_target_type_idx = rng.gen_range(0..valid_types.len()); + let (new_target_type, name) = valid_types + .get(new_target_type_idx) + .expect("generated reasonable index") + .clone(); + let (subexp, newreqs) = + generate_expr(rng, established_struct_types, env, Some(new_target_type)); + requirements.extend(newreqs); + + ( + Expression::FieldRef(Location::manufactured(), Box::new(subexp), name), + requirements, + ) + } + + ExpressionType::Cast => match target_type { + Type::Named(ref name) => match ConstantType::from_str(name.current_name()) { + Ok(ct) => { + let new_targets = ct.safe_casts_to(); + let idx = rng.gen_range(0..new_targets.len()); + let new_constant_type = new_targets.get(idx).expect("generates in bounds"); + let new_type = Type::Named(Name::manufactured(new_constant_type.name())); + let (subexpr, reqs) = + generate_expr(rng, established_struct_types, env, Some(new_type)); + ( + Expression::Cast(Location::manufactured(), ct.name(), Box::new(subexpr)), + reqs, + ) + } + + Err(_) => { + let (subexpr, reqs) = generate_expr( + rng, + established_struct_types, + env, + Some(target_type.clone()), + ); + ( + Expression::Cast( + Location::manufactured(), + name.current_name().to_string(), + Box::new(subexpr), + ), + reqs, + ) + } + }, + + Type::Struct(fields) => match find_struct_for_fields(&fields, established_struct_types) + { + None => { + let name = generate_name(rng); + let mut new_fields = HashMap::new(); + + for (field_name, maybe_type) in fields.into_iter() { + let field_type = maybe_type + .unwrap_or_else(|| generate_type(rng, established_struct_types)); + new_fields.insert(field_name, field_type); + } + + established_struct_types.insert(name.clone(), new_fields.clone()); + let (subexpr, mut reqs) = generate_expr( + rng, + established_struct_types, + env, + Some(Type::Named(name.clone())), + ); + let result = Expression::Cast( + Location::manufactured(), + name.current_name().to_string(), + Box::new(subexpr), + ); + reqs.push(Requirement::Structure(name, new_fields)); + (result, reqs) + } + + Some((name, _)) => { + let (subexpr, reqs) = generate_expr( + rng, + established_struct_types, + env, + Some(Type::Named(name.clone())), + ); + let result = Expression::Cast( + Location::manufactured(), + name.current_name().to_string(), + Box::new(subexpr), + ); + (result, reqs) + } + }, + }, + + ExpressionType::Call => match target_type { + Type::Named(ref x) => match ConstantType::from_str(x.current_name()) { + Err(_) => { + genererate_call_to_function(rng, established_struct_types, env, target_type) + } + Ok(prim) if CALL_SHOULD_BE_PRIMITIVE.sample(rng) => { + let options = prim.primitives_for(); + let idx = rng.gen_range(0..options.len()); + let (primitive, argtypes) = options.get(idx).expect("index in range"); + let mut prereqs = vec![]; + let mut exprs = vec![]; + let func = Expression::Primitive( + Location::manufactured(), + Name::new(primitive, Location::manufactured()), + ); + + for possible_type in argtypes.iter() { + let (expr, new_prereqs) = generate_expr( + rng, + established_struct_types, + env, + possible_type + .map(|x| Type::Named(Name::new(x, Location::manufactured()))), + ); + exprs.push(expr); + prereqs.extend(new_prereqs.into_iter()); + } + + let retval = Expression::Call(Location::manufactured(), Box::new(func), exprs); + + (retval, prereqs) + } + Ok(_) => { + genererate_call_to_function(rng, established_struct_types, env, target_type) + } + }, + + Type::Struct(_) => { + genererate_call_to_function(rng, established_struct_types, env, target_type) + } + }, + + ExpressionType::Block => { + let target_block_size_minus_one = BLOCK_LENGTH_MINUS_ONE.sample(rng); + let mut block = VecDeque::new(); + let mut prereqs = vec![]; + + while block.len() < target_block_size_minus_one { + let inner_type = if INNER_BLOCK_TYPE_SHOULD_BE_VOID.sample(rng) { + Some(Type::Named(Name::new("void", Location::manufactured()))) + } else { + Some(generate_type(rng, established_struct_types)) + }; + let (expr, new_prereqs) = + generate_expr(rng, established_struct_types, env, inner_type); + block.push_back(expr); + + let mut new_work_queue: VecDeque = new_prereqs.into_iter().collect(); + + while let Some(next) = new_work_queue.pop_front() { + if let Requirement::Variable(name, varty) = next { + let (value, even_newer_reqs) = + generate_expr(rng, established_struct_types, env, Some(varty)); + + block.push_front(Expression::Binding( + Location::manufactured(), + name, + Box::new(value), + )); + for req in even_newer_reqs.into_iter() { + new_work_queue.push_front(req); + } + } else { + prereqs.push(next); + } + } + } + + let retval = Expression::Block(Location::manufactured(), block.into_iter().collect()); + (retval, prereqs) + } + + ExpressionType::Binding => { + let name = generate_name(rng); + let (expr, prereqs) = generate_expr( + rng, + established_struct_types, + env, + Some(target_type.clone()), + ); + env.insert(name.clone(), target_type); + ( + Expression::Binding(Location::manufactured(), name, Box::new(expr)), + prereqs, + ) + } + } +} + +fn generate_constant(rng: &mut TestRng, ct: ConstantType) -> Expression { + let build = + |val: u64| Expression::Value(Location::manufactured(), Value::Number(None, Some(ct), val)); + + match ct { + ConstantType::I8 => build(rng.gen::() as u64), + ConstantType::I16 => build(rng.gen::() as u64), + ConstantType::I32 => build(rng.gen::() as u64), + ConstantType::I64 => build(rng.gen::() as u64), + ConstantType::U8 => build(rng.gen::() as u64), + ConstantType::U16 => build(rng.gen::() as u64), + ConstantType::U32 => build(rng.gen::() as u64), + ConstantType::U64 => build(rng.gen::()), + ConstantType::Void => Expression::Value(Location::manufactured(), Value::Void), + } +} + +fn generate_constructor( + rng: &mut TestRng, + established_struct_types: &mut EstablishedStructMap, + env: &mut ScopedMap, + target_type: Type, +) -> (Expression, Vec) { + match target_type { + Type::Named(x) => match ConstantType::from_str(x.current_name()) { + Ok(ct) => (generate_constant(rng, ct), vec![]), + Err(_) => { + let mut field_assignments = vec![]; + let mut requirements = vec![]; + + let fields = match established_struct_types.get(&x) { + Some(fields) => fields.clone(), + None => { + let fields = generate_structure_fields(rng, established_struct_types); + requirements.push(Requirement::Structure(x.clone(), fields.clone())); + established_struct_types.insert(x.clone(), fields.clone()); + fields + } + }; + + for (field_name, field_type) in fields.into_iter() { + let (subexpr, reqs) = + generate_expr(rng, established_struct_types, env, Some(field_type)); + requirements.extend(reqs.into_iter()); + field_assignments.push((field_name, subexpr)); + } + + let result = + Expression::Constructor(Location::manufactured(), x, field_assignments); + + (result, requirements) + } + }, + + Type::Struct(fields) => { + let mut requirements = vec![]; + + let (name, fields) = match find_struct_for_fields(&fields, established_struct_types) { + Some((name, fields)) => (name, fields), + None => { + let new_name = generate_name(rng); + let mut result_fields = HashMap::new(); + + for (field, opttype) in fields { + let field_type = opttype + .clone() + .unwrap_or_else(|| generate_type(rng, established_struct_types)); + + result_fields.insert(field.clone(), field_type); + } + + requirements.push(Requirement::Structure( + new_name.clone(), + result_fields.clone(), + )); + (new_name, result_fields) + } + }; + + let mut field_assignments = vec![]; + for (field_name, field_type) in fields.into_iter() { + let (subexpr, reqs) = + generate_expr(rng, established_struct_types, env, Some(field_type)); + requirements.extend(reqs.into_iter()); + field_assignments.push((field_name, subexpr)); + } + + let result = Expression::Constructor(Location::manufactured(), name, field_assignments); + (result, requirements) + } + } +} + +fn generate_function( + rng: &mut TestRng, + established_struct_types: &mut EstablishedStructMap, + env: &mut ScopedMap, + name: Name, + arg_types: Vec, + ret: Type, +) -> (Expression, Vec) { + let mut args = vec![]; + + env.new_scope(); + for arg_type in arg_types.into_iter() { + let arg_name = generate_name(rng); + args.push((arg_name.clone(), Some(arg_type.clone()))); + env.insert(arg_name, arg_type); + } + let (body, prereqs) = generate_expr(rng, established_struct_types, env, Some(ret.clone())); + env.release_scope(); + + let function = Expression::Function( + Location::manufactured(), + Some(name), + args, + Some(ret), + Box::new(body), + ); + + (function, prereqs) +} + +fn generate_structure_fields( + rng: &mut TestRng, + established_struct_types: &mut EstablishedStructMap, +) -> HashMap { + let mut fields = HashMap::new(); + + for _ in 0..STRUCTURE_FIELD_COUNT.sample(rng) { + let name = generate_name(rng); + let ty = generate_type(rng, established_struct_types); + fields.insert(name, ty); + } + + fields +} + +fn find_struct_for_fields( + found_fields: &[(Name, Option)], + established_struct_types: &EstablishedStructMap, +) -> Option<(Name, HashMap)> { + 'top_search: for (name, established_fields) in established_struct_types.iter() { + if found_fields.len() == established_fields.len() { + for (found_name, found_ty) in found_fields.iter() { + match established_fields.get(found_name) { + None => continue 'top_search, + Some(established_ty) => { + if let Some(found_ty) = found_ty { + if found_ty != established_ty { + continue 'top_search; + } + } + } + } + } + return Some((name.clone(), established_fields.clone())); + } + } + + None +} + +fn generate_name(rng: &mut TestRng) -> Name { + static COUNTER: AtomicU64 = AtomicU64::new(0); + + let idx = rng.gen_range(0..names::NOUNS.len()); + let name = format!( + "{}{}", + names::NOUNS[idx], + COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + ); + + Name::new(name, Location::manufactured()) +} + +fn generate_type(rng: &mut TestRng, established_struct_types: &mut EstablishedStructMap) -> Type { + let possible_result = TYPE_FREQUENCIES.sample(rng); + + match possible_result { + "" if !established_struct_types.is_empty() => { + let keys: Vec<&Name> = established_struct_types.keys().collect(); + let idx = rng.gen_range(0..keys.len()); + let key = *keys.get(idx).expect("index in range"); + Type::Named(Name::new(key.clone(), Location::manufactured())) + } + + "" | "" => { + let fields_map = generate_structure_fields(rng, established_struct_types); + let mut fields = vec![]; + + for (name, ty) in fields_map.into_iter() { + if KEEP_FIELD_TYPE_ANNOTATION.sample(rng) { + fields.push((name, Some(ty))); + } else { + fields.push((name, None)); + } + } + + Type::Struct(fields) + } + + _ => Type::Named(Name::new(possible_result, Location::manufactured())), + } +} + +fn find_structs_with_field_type( + established_struct_types: &mut EstablishedStructMap, + target_type: &Type, +) -> Vec<(Type, Name)> { + let mut results = vec![]; + + for (est_name, est_fields) in established_struct_types.iter() { + for (field_name, field_type) in est_fields.iter() { + if target_type == field_type { + results.push((Type::Named(est_name.clone()), field_name.clone())); + } + } + } + + results +} + +fn find_variables_with_type(env: &ScopedMap, target_type: &Type) -> Vec { + let mut results = vec![]; + + for (name, ty) in env.bindings().into_iter() { + if target_type == &ty { + results.push(name); + } + } + + results +} + +fn genererate_call_to_function( + rng: &mut TestRng, + established_struct_types: &mut EstablishedStructMap, + env: &mut ScopedMap, + target_type: Type, +) -> (Expression, Vec) { + let name = generate_name(rng); + let arg_count = CALL_ARGUMENT_COUNT.sample(rng); + let mut prereqs = vec![]; + let mut arg_types = vec![]; + let mut arg_exprs = vec![]; + + for _ in 0..arg_count { + let arg_type = generate_type(rng, established_struct_types); + let (arg_expr, new_reqs) = + generate_expr(rng, established_struct_types, env, Some(arg_type.clone())); + arg_types.push(arg_type); + arg_exprs.push(arg_expr); + prereqs.extend(new_reqs.into_iter()); + } + + let call = Expression::Call( + Location::manufactured(), + Box::new(Expression::Reference(name.clone())), + arg_exprs, + ); + prereqs.push(Requirement::Function(name, arg_types, target_type)); + + (call, prereqs) +} + impl ValueTree for ProgramTree { type Value = Vec; fn current(&self) -> Self::Value { - self.current.clone() + self.current.iter().cloned().collect() } fn simplify(&mut self) -> bool { @@ -54,255 +760,3 @@ impl ValueTree for ProgramTree { false } } - -//impl ConstantType { -// fn get_operators(&self) -> &'static [(&'static str, usize)] { -// match self { -// ConstantType::Void => &[], -// ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64 => { -// &[("+", 2), ("negate", 1), ("-", 2), ("*", 2), ("/", 2)] -// } -// ConstantType::U8 | ConstantType::U16 | ConstantType::U32 | ConstantType::U64 => { -// &[("+", 2), ("-", 2), ("*", 2), ("/", 2)] -// } -// } -// } -//} -// -//#[derive(Clone)] -//pub struct GenerationEnvironment { -// allow_inference: bool, -// block_length: Range, -// bindings: HashMap, -// return_type: ConstantType, -//} -// -//impl Default for GenerationEnvironment { -// fn default() -> Self { -// GenerationEnvironment { -// allow_inference: true, -// block_length: 2..10, -// bindings: HashMap::new(), -// return_type: ConstantType::U64, -// } -// } -//} -// -//impl GenerationEnvironment { -// pub fn new(allow_inference: bool) -> Self { -// GenerationEnvironment { -// allow_inference, -// ..Default::default() -// } -// } -//} -// -//impl Arbitrary for Program { -// type Parameters = GenerationEnvironment; -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy { -// proptest::collection::vec( -// ProgramTopLevelInfo::arbitrary(), -// genenv.block_length.clone(), -// ) -// .prop_flat_map(move |mut ptlis| { -// let mut items = Vec::new(); -// let mut genenv = genenv.clone(); -// -// for psi in ptlis.drain(..) { -// if genenv.bindings.is_empty() || psi.should_be_binding { -// genenv.return_type = psi.binding_type; -// let expr = Expression::arbitrary_with(genenv.clone()); -// genenv.bindings.insert(psi.name.clone(), psi.binding_type); -// items.push( -// expr.prop_map(move |expr| { -// TopLevel::Expression(Expression::Binding( -// Location::manufactured(), -// psi.name.clone(), -// Box::new(expr), -// )) -// }) -// .boxed(), -// ); -// } else { -// let printers = genenv.bindings.keys().map(|n| { -// Just(TopLevel::Expression(Expression::Call( -// Location::manufactured(), -// Box::new(Expression::Primitive( -// Location::manufactured(), -// Name::manufactured("print"), -// )), -// vec![Expression::Reference(n.clone())], -// ))) -// }); -// items.push(Union::new(printers).boxed()); -// } -// } -// -// items -// .prop_map(|items| Program { -// functions: HashMap::new(), -// structures: HashMap::new(), -// body: unimplemented!(), -// }) -// .boxed() -// }) -// .boxed() -// } -//} -// -//impl Arbitrary for Name { -// type Parameters = (); -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { -// VALID_VARIABLE_NAMES.prop_map(Name::manufactured).boxed() -// } -//} -// -//#[derive(Debug)] -//struct ProgramTopLevelInfo { -// should_be_binding: bool, -// name: Name, -// binding_type: ConstantType, -//} -// -//impl Arbitrary for ProgramTopLevelInfo { -// type Parameters = (); -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy { -// ( -// Union::new(vec![Just(true), Just(true), Just(false)]), -// Name::arbitrary(), -// ConstantType::arbitrary(), -// ) -// .prop_map( -// |(should_be_binding, name, binding_type)| ProgramTopLevelInfo { -// should_be_binding, -// name, -// binding_type, -// }, -// ) -// .boxed() -// } -//} -// -//impl Arbitrary for Expression { -// type Parameters = GenerationEnvironment; -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy { -// // Value(Location, Value). These are the easiest variations to create, because we can always -// // create one. -// let value_strategy = Value::arbitrary_with(genenv.clone()) -// .prop_map(|x| Expression::Value(Location::manufactured(), x)) -// .boxed(); -// -// // Reference(Location, String), These are slightly trickier, because we can end up in a situation -// // where either no variables are defined, or where none of the defined variables have a type we -// // can work with. So what we're going to do is combine this one with the previous one as a "leaf -// // strategy" -- our non-recursive items -- if we can, or just set that to be the value strategy -// // if we can't actually create an references. -// let mut bound_variables_of_type = genenv -// .bindings -// .iter() -// .filter(|(_, v)| genenv.return_type == **v) -// .map(|(n, _)| n) -// .collect::>(); -// let leaf_strategy = if bound_variables_of_type.is_empty() { -// value_strategy -// } else { -// let mut strats = bound_variables_of_type -// .drain(..) -// .map(|x| Just(Expression::Reference(x.clone())).boxed()) -// .collect::>(); -// strats.push(value_strategy); -// Union::new(strats).boxed() -// }; -// -// // now we generate our recursive types, given our leaf strategy -// leaf_strategy -// .prop_recursive(3, 10, 2, move |strat| { -// ( -// select(genenv.return_type.get_operators()), -// strat.clone(), -// strat, -// ) -// .prop_map(|((oper, count), left, right)| { -// let mut args = vec![left, right]; -// while args.len() > count { -// args.pop(); -// } -// Expression::Call( -// Location::manufactured(), -// Box::new(Expression::Primitive( -// Location::manufactured(), -// Name::manufactured(oper), -// )), -// args, -// ) -// }) -// }) -// .boxed() -// } -//} -// -//impl Arbitrary for Value { -// type Parameters = GenerationEnvironment; -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(genenv: Self::Parameters) -> Self::Strategy { -// let printed_base_strategy = Union::new([ -// Just(None::), -// Just(Some(2)), -// Just(Some(8)), -// Just(Some(10)), -// Just(Some(16)), -// ]); -// let value_strategy = u64::arbitrary(); -// -// (printed_base_strategy, bool::arbitrary(), value_strategy) -// .prop_map(move |(base, declare_type, value)| { -// let converted_value = match genenv.return_type { -// ConstantType::Void => value, -// ConstantType::I8 => value % (i8::MAX as u64), -// ConstantType::U8 => value % (u8::MAX as u64), -// ConstantType::I16 => value % (i16::MAX as u64), -// ConstantType::U16 => value % (u16::MAX as u64), -// ConstantType::I32 => value % (i32::MAX as u64), -// ConstantType::U32 => value % (u32::MAX as u64), -// ConstantType::I64 => value % (i64::MAX as u64), -// ConstantType::U64 => value, -// }; -// let ty = if declare_type || !genenv.allow_inference { -// Some(genenv.return_type) -// } else { -// None -// }; -// Value::Number(base, ty, converted_value) -// }) -// .boxed() -// } -//} -// -//impl Arbitrary for ConstantType { -// type Parameters = (); -// type Strategy = BoxedStrategy; -// -// fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { -// Union::new([ -// Just(ConstantType::I8), -// Just(ConstantType::I16), -// Just(ConstantType::I32), -// Just(ConstantType::I64), -// Just(ConstantType::U8), -// Just(ConstantType::U16), -// Just(ConstantType::U32), -// Just(ConstantType::U64), -// ]) -// .boxed() -// } -//} -// diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 7fb24e9..56ed690 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -76,7 +76,7 @@ impl StructureDefinition { #[derive(Clone, Debug, PartialEq)] pub enum TopLevel { Expression(Expression), - Structure(Location, Name, Vec<(Name, Type)>), + Structure(Location, Name, Vec<(Name, Option)>), } impl Located for TopLevel { @@ -205,6 +205,8 @@ pub enum Value { /// operation "-" on the number 4. We'll translate this into a type-specific /// number at a later time. Number(Option, Option, u64), + /// The empty value + Void, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/src/syntax/eval.rs b/src/syntax/eval.rs index e07a876..30c51a3 100644 --- a/src/syntax/eval.rs +++ b/src/syntax/eval.rs @@ -45,6 +45,8 @@ impl Expression { Some(ConstantType::U32) => Ok(Value::U32(*v as u32)), Some(ConstantType::U64) => Ok(Value::U64(*v)), }, + + super::Value::Void => Ok(Value::Void), }, Expression::Constructor(_, on, fields) => { diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 2a9516a..6455827 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -92,9 +92,12 @@ Structure: TopLevel = { } } -Field: (Name, Type) = { +Field: (Name, Option) = { "> ":" ";" => - (Name::new(name, Location::new(file_idx, s..e)), field_type) + (Name::new(name, Location::new(file_idx, s..e)), Some(field_type)), + "> ";" => + (Name::new(name, Location::new(file_idx, s..e)), None), + } Type: Type = { diff --git a/src/syntax/pretty.rs b/src/syntax/pretty.rs index e184aaf..36deeab 100644 --- a/src/syntax/pretty.rs +++ b/src/syntax/pretty.rs @@ -84,7 +84,10 @@ impl Program { impl TopLevel { pub fn pretty<'a>(&self, allocator: &'a Allocator<'a>) -> DocBuilder<'a, Allocator<'a>> { match self { - TopLevel::Expression(expr) => expr.pretty(allocator), + TopLevel::Expression(expr) => expr + .pretty(allocator) + .append(allocator.text(";")) + .append(allocator.hardline()), TopLevel::Structure(_, name, fields) => allocator .text("struct") .append(allocator.space()) @@ -95,17 +98,24 @@ impl TopLevel { .append( allocator .concat(fields.iter().map(|(name, ty)| { + let type_bit = if let Some(ty) = ty { + allocator + .text(":") + .append(allocator.space()) + .append(ty.pretty(allocator)) + } else { + allocator.nil() + }; allocator .text(name.to_string()) - .append(allocator.text(":")) - .append(allocator.space()) - .append(ty.pretty(allocator)) + .append(type_bit) .append(allocator.text(";")) .append(allocator.hardline()) })) .nest(2), ) - .append(allocator.text("}")), + .append(allocator.text("}")) + .append(allocator.hardline()), } } } @@ -231,6 +241,8 @@ impl Value { allocator.text(value_str) } + + Value::Void => allocator.text(""), } } } diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 9ad3e09..688ac42 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -1,6 +1,6 @@ use internment::ArcIntern; use logos::{Lexer, Logos}; -use std::fmt; +use std::{fmt, str::FromStr}; use thiserror::Error; /// A single token of the input stream; used to help the parsing go down @@ -205,6 +205,43 @@ impl From for cranelift_codegen::ir::Type { } } +pub struct StringNotConstantType(); + +impl FromStr for ConstantType { + type Err = StringNotConstantType; + + fn from_str(s: &str) -> Result { + match s { + "i8" => Ok(ConstantType::I8), + "i16" => Ok(ConstantType::I16), + "i32" => Ok(ConstantType::I32), + "i64" => Ok(ConstantType::I64), + "u8" => Ok(ConstantType::U8), + "u16" => Ok(ConstantType::U16), + "u32" => Ok(ConstantType::U32), + "u64" => Ok(ConstantType::U64), + "void" => Ok(ConstantType::Void), + _ => Err(StringNotConstantType()), + } + } +} + +impl fmt::Display for ConstantType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConstantType::I8 => write!(f, "i8"), + ConstantType::I16 => write!(f, "i16"), + ConstantType::I32 => write!(f, "i32"), + ConstantType::I64 => write!(f, "i64"), + ConstantType::U8 => write!(f, "u8"), + ConstantType::U16 => write!(f, "u16"), + ConstantType::U32 => write!(f, "u32"), + ConstantType::U64 => write!(f, "u64"), + ConstantType::Void => write!(f, "void"), + } + } +} + impl ConstantType { /// Return the set of types that can be safely casted into this type. pub fn safe_casts_to(self) -> Vec { @@ -268,6 +305,32 @@ impl ConstantType { ConstantType::U64 => "u64".to_string(), } } + + /// Return the set of all primitives that can return this + /// type, along with the argument types for those primitives. + /// + /// A "None" value as an argument type means that the argument + /// type is unconstrained by the return type. + pub fn primitives_for(&self) -> Vec<(crate::ir::Primitive, Vec>)> { + use crate::ir::Primitive::*; + + match self { + ConstantType::Void => vec![(Print, vec![None])], + ConstantType::I8 | ConstantType::I16 | ConstantType::I32 | ConstantType::I64 => vec![ + (Plus, vec![Some(*self), Some(*self)]), + (Minus, vec![Some(*self), Some(*self)]), + (Times, vec![Some(*self), Some(*self)]), + (Divide, vec![Some(*self), Some(*self)]), + (Negate, vec![Some(*self)]), + ], + ConstantType::U8 | ConstantType::U16 | ConstantType::U32 | ConstantType::U64 => vec![ + (Plus, vec![Some(*self), Some(*self)]), + (Minus, vec![Some(*self), Some(*self)]), + (Times, vec![Some(*self), Some(*self)]), + (Divide, vec![Some(*self), Some(*self)]), + ], + } + } } #[derive(Debug, Error, PartialEq)] diff --git a/src/syntax/validate.rs b/src/syntax/validate.rs index 8655643..c754101 100644 --- a/src/syntax/validate.rs +++ b/src/syntax/validate.rs @@ -99,11 +99,8 @@ impl Program { } TopLevel::Structure(loc, name, fields) => { - let definition = StructureDefinition::new( - loc, - name.clone(), - fields.into_iter().map(|(n, t)| (n, Some(t))).collect(), - ); + let definition = + StructureDefinition::new(loc, name.clone(), fields.into_iter().collect()); structures.insert(name, definition); } diff --git a/src/type_infer/convert.rs b/src/type_infer/convert.rs index 1c0060d..57c3e15 100644 --- a/src/type_infer/convert.rs +++ b/src/type_infer/convert.rs @@ -124,6 +124,14 @@ impl InferenceEngine { // converting values is mostly tedious, because there's so many cases // involved syntax::Expression::Value(loc, val) => match val { + syntax::Value::Void => ( + ir::Expression::Atomic(ir::ValueOrRef::Value( + loc, + ir::TypeOrVar::Primitive(PrimitiveType::Void), + ir::Value::Void, + )), + ir::TypeOrVar::Primitive(PrimitiveType::Void), + ), syntax::Value::Number(base, mctype, value) => { let (newval, newtype) = match mctype { None => { diff --git a/src/util.rs b/src/util.rs index d872744..d2217e3 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,3 +1,4 @@ pub mod pretty; pub mod scoped_map; pub mod warning_result; +pub mod weighted_map; diff --git a/src/util/weighted_map.rs b/src/util/weighted_map.rs new file mode 100644 index 0000000..48a9aa8 --- /dev/null +++ b/src/util/weighted_map.rs @@ -0,0 +1,21 @@ +use rand::distributions::{Distribution, WeightedIndex}; + +pub struct WeightedMap { + index: WeightedIndex, + items: Vec, +} + +impl WeightedMap { + pub fn new(map: &[(usize, T)]) -> Self { + let index = WeightedIndex::new(map.iter().map(|x| x.0)).unwrap(); + let items = map.iter().map(|x| x.1.clone()).collect(); + WeightedMap { index, items } + } +} + +impl Distribution for WeightedMap { + fn sample(&self, rng: &mut R) -> T { + let idx = self.index.sample(rng); + self.items.get(idx).unwrap().clone() + } +}