🤷 The initial version of the compiler, both static and JIT.
This implements a full compiler, with both static compilation and JIT support, for the world's simplest and silliest programming language. You can do math, and print variables. That's it. On the bright side, it implements every part of the compiler, from the lexer and parser; through analysis and simplification; and into a reasonable code generator. This should be a good jumping off point for adding more advanced features. Tests, including proptests, are included to help avoid regressions.
This commit is contained in:
159
src/syntax/arbitrary.rs
Normal file
159
src/syntax/arbitrary.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use crate::syntax::ast::{Expression, Program, Statement, Value};
|
||||
use crate::syntax::location::Location;
|
||||
use proptest::sample::select;
|
||||
use proptest::{
|
||||
prelude::{Arbitrary, BoxedStrategy, Strategy},
|
||||
strategy::{Just, Union},
|
||||
};
|
||||
|
||||
const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*";
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Name(String);
|
||||
|
||||
impl Arbitrary for Name {
|
||||
type Parameters = ();
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
|
||||
VALID_VARIABLE_NAMES.prop_map(Name).boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl Arbitrary for Program {
|
||||
type Parameters = ();
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
|
||||
let optionals = Vec::<Option<Name>>::arbitrary();
|
||||
|
||||
optionals
|
||||
.prop_flat_map(|mut possible_names| {
|
||||
let mut statements = Vec::new();
|
||||
let mut defined_variables: HashSet<String> = HashSet::new();
|
||||
|
||||
for possible_name in possible_names.drain(..) {
|
||||
match possible_name {
|
||||
None if defined_variables.is_empty() => continue,
|
||||
None => statements.push(
|
||||
Union::new(defined_variables.iter().map(|name| {
|
||||
Just(Statement::Print(Location::manufactured(), name.to_string()))
|
||||
}))
|
||||
.boxed(),
|
||||
),
|
||||
Some(new_name) => {
|
||||
let closures_name = new_name.0.clone();
|
||||
let retval =
|
||||
Expression::arbitrary_with(Some(defined_variables.clone()))
|
||||
.prop_map(move |exp| {
|
||||
Statement::Binding(
|
||||
Location::manufactured(),
|
||||
closures_name.clone(),
|
||||
exp,
|
||||
)
|
||||
})
|
||||
.boxed();
|
||||
|
||||
defined_variables.insert(new_name.0);
|
||||
statements.push(retval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
statements
|
||||
})
|
||||
.prop_map(|statements| Program { statements })
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl Arbitrary for Statement {
|
||||
type Parameters = Option<HashSet<String>>;
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
||||
let duplicated_args = args.clone();
|
||||
let defined_variables = args.unwrap_or_default();
|
||||
|
||||
let binding_strategy = (
|
||||
VALID_VARIABLE_NAMES,
|
||||
Expression::arbitrary_with(duplicated_args),
|
||||
)
|
||||
.prop_map(|(name, exp)| Statement::Binding(Location::manufactured(), name, exp))
|
||||
.boxed();
|
||||
|
||||
if defined_variables.is_empty() {
|
||||
binding_strategy
|
||||
} else {
|
||||
let print_strategy = Union::new(
|
||||
defined_variables
|
||||
.iter()
|
||||
.map(|x| Just(Statement::Print(Location::manufactured(), x.to_string()))),
|
||||
)
|
||||
.boxed();
|
||||
|
||||
Union::new([binding_strategy, print_strategy]).boxed()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Arbitrary for Expression {
|
||||
type Parameters = Option<HashSet<String>>;
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
||||
let defined_variables = args.unwrap_or_default();
|
||||
|
||||
let value_strategy = Value::arbitrary()
|
||||
.prop_map(move |x| Expression::Value(Location::manufactured(), x))
|
||||
.boxed();
|
||||
|
||||
let leaf_strategy = if defined_variables.is_empty() {
|
||||
value_strategy
|
||||
} else {
|
||||
let reference_strategy = Union::new(defined_variables.iter().map(|x| {
|
||||
Just(Expression::Reference(
|
||||
Location::manufactured(),
|
||||
x.to_owned(),
|
||||
))
|
||||
}))
|
||||
.boxed();
|
||||
Union::new([value_strategy, reference_strategy]).boxed()
|
||||
};
|
||||
|
||||
leaf_strategy
|
||||
.prop_recursive(3, 64, 2, move |inner| {
|
||||
(
|
||||
select(super::BINARY_OPERATORS),
|
||||
proptest::collection::vec(inner, 2),
|
||||
)
|
||||
.prop_map(move |(operator, exprs)| {
|
||||
Expression::Primitive(Location::manufactured(), operator.to_string(), exprs)
|
||||
})
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
||||
impl Arbitrary for Value {
|
||||
type Parameters = ();
|
||||
type Strategy = BoxedStrategy<Self>;
|
||||
|
||||
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
|
||||
let base_strategy = Union::new([
|
||||
Just(None::<u8>),
|
||||
Just(Some(2)),
|
||||
Just(Some(8)),
|
||||
Just(Some(10)),
|
||||
Just(Some(16)),
|
||||
]);
|
||||
|
||||
let value_strategy = i64::arbitrary();
|
||||
|
||||
(base_strategy, value_strategy)
|
||||
.prop_map(move |(base, value)| Value::Number(base, value))
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
60
src/syntax/ast.rs
Normal file
60
src/syntax/ast.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
use crate::syntax::Location;
|
||||
|
||||
pub static BINARY_OPERATORS: &[&str] = &["+", "-", "*", "/"];
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Program {
|
||||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Statement {
|
||||
Binding(Location, String, Expression),
|
||||
Print(Location, String),
|
||||
}
|
||||
|
||||
impl PartialEq for Statement {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
match self {
|
||||
Statement::Binding(_, name1, expr1) => match other {
|
||||
Statement::Binding(_, name2, expr2) => name1 == name2 && expr1 == expr2,
|
||||
_ => false,
|
||||
},
|
||||
Statement::Print(_, name1) => match other {
|
||||
Statement::Print(_, name2) => name1 == name2,
|
||||
_ => false,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Expression {
|
||||
Value(Location, Value),
|
||||
Reference(Location, String),
|
||||
Primitive(Location, String, Vec<Expression>),
|
||||
}
|
||||
|
||||
impl PartialEq for Expression {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
match self {
|
||||
Expression::Value(_, val1) => match other {
|
||||
Expression::Value(_, val2) => val1 == val2,
|
||||
_ => false,
|
||||
},
|
||||
Expression::Reference(_, var1) => match other {
|
||||
Expression::Reference(_, var2) => var1 == var2,
|
||||
_ => false,
|
||||
},
|
||||
Expression::Primitive(_, prim1, args1) => match other {
|
||||
Expression::Primitive(_, prim2, args2) => prim1 == prim2 && args1 == args2,
|
||||
_ => false,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Value {
|
||||
Number(Option<u8>, i64),
|
||||
}
|
||||
56
src/syntax/location.rs
Normal file
56
src/syntax/location.rs
Normal file
@@ -0,0 +1,56 @@
|
||||
use codespan_reporting::diagnostic::{Diagnostic, Label};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct Location {
|
||||
file_idx: usize,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl Location {
|
||||
pub fn new(file_idx: usize, offset: usize) -> Self {
|
||||
Location { file_idx, offset }
|
||||
}
|
||||
|
||||
pub fn manufactured() -> Self {
|
||||
Location {
|
||||
file_idx: 0,
|
||||
offset: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn primary_label(&self) -> Label<usize> {
|
||||
Label::primary(self.file_idx, self.offset..self.offset)
|
||||
}
|
||||
|
||||
pub fn secondary_label(&self) -> Label<usize> {
|
||||
Label::secondary(self.file_idx, self.offset..self.offset)
|
||||
}
|
||||
|
||||
pub fn range_label(&self, end: &Location) -> Vec<Label<usize>> {
|
||||
if self.file_idx == end.file_idx {
|
||||
vec![Label::primary(self.file_idx, self.offset..end.offset)]
|
||||
} else if self.file_idx == 0 {
|
||||
// if this is a manufactured item, then ... just try the other one
|
||||
vec![Label::primary(end.file_idx, end.offset..end.offset)]
|
||||
} else {
|
||||
// we'll just pick the first location if this is in two different
|
||||
// files
|
||||
vec![Label::primary(self.file_idx, self.offset..self.offset)]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn error(&self) -> Diagnostic<usize> {
|
||||
Diagnostic::error().with_labels(vec![Label::primary(
|
||||
self.file_idx,
|
||||
self.offset..self.offset,
|
||||
)])
|
||||
}
|
||||
|
||||
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
|
||||
Diagnostic::error().with_labels(vec![Label::primary(
|
||||
self.file_idx,
|
||||
self.offset..self.offset,
|
||||
)
|
||||
.with_message(msg)])
|
||||
}
|
||||
}
|
||||
78
src/syntax/parser.lalrpop
Normal file
78
src/syntax/parser.lalrpop
Normal file
@@ -0,0 +1,78 @@
|
||||
use crate::syntax::{LexerError, Location};
|
||||
use crate::syntax::ast::{Program,Statement,Expression,Value};
|
||||
use crate::syntax::tokens::Token;
|
||||
use internment::ArcIntern;
|
||||
|
||||
grammar(file_idx: usize);
|
||||
|
||||
extern {
|
||||
type Location = usize;
|
||||
type Error = LexerError;
|
||||
|
||||
enum Token {
|
||||
"=" => Token::Equals,
|
||||
";" => Token::Semi,
|
||||
"(" => Token::LeftParen,
|
||||
")" => Token::RightParen,
|
||||
|
||||
"print" => Token::Print,
|
||||
|
||||
"+" => Token::Operator('+'),
|
||||
"-" => Token::Operator('-'),
|
||||
"*" => Token::Operator('*'),
|
||||
"/" => Token::Operator('/'),
|
||||
|
||||
"<num>" => Token::Number((<Option<u8>>,<i64>)),
|
||||
"<var>" => Token::Variable(<ArcIntern<String>>),
|
||||
}
|
||||
}
|
||||
|
||||
pub Program: Program = {
|
||||
<stmts:Statements> => Program {
|
||||
statements: stmts
|
||||
}
|
||||
}
|
||||
|
||||
Statements: Vec<Statement> = {
|
||||
<mut stmts:Statements> <stmt:Statement> => {
|
||||
stmts.push(stmt);
|
||||
stmts
|
||||
},
|
||||
=> {
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub Statement: Statement = {
|
||||
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
|
||||
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
|
||||
}
|
||||
|
||||
Expression: Expression = {
|
||||
AdditiveExpression,
|
||||
}
|
||||
|
||||
AdditiveExpression: Expression = {
|
||||
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
|
||||
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
|
||||
MultiplicativeExpression,
|
||||
}
|
||||
|
||||
MultiplicativeExpression: Expression = {
|
||||
<e1:MultiplicativeExpression> <l:@L> "*" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
|
||||
<e1:MultiplicativeExpression> <l:@L> "/" <e2:AtomicExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
|
||||
AtomicExpression,
|
||||
}
|
||||
|
||||
AtomicExpression: Expression = {
|
||||
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
|
||||
<l:@L> <n:"<num>"> => {
|
||||
let val = Value::Number(n.0, n.1);
|
||||
Expression::Value(Location::new(file_idx, l), val)
|
||||
},
|
||||
<l:@L> "-" <n:"<num>"> => {
|
||||
let val = Value::Number(n.0, -n.1);
|
||||
Expression::Value(Location::new(file_idx, l), val)
|
||||
},
|
||||
"(" <e:Expression> ")" => e,
|
||||
}
|
||||
115
src/syntax/pretty.rs
Normal file
115
src/syntax/pretty.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use crate::syntax::ast::{Expression, Program, Statement, Value, BINARY_OPERATORS};
|
||||
use pretty::{DocAllocator, DocBuilder, Pretty};
|
||||
|
||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Program
|
||||
where
|
||||
A: 'a,
|
||||
D: ?Sized + DocAllocator<'a, A>,
|
||||
{
|
||||
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
|
||||
let mut result = allocator.nil();
|
||||
|
||||
for stmt in self.statements.iter() {
|
||||
result = result
|
||||
.append(stmt.pretty(allocator))
|
||||
.append(allocator.text(";"))
|
||||
.append(allocator.hardline());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Statement
|
||||
where
|
||||
A: 'a,
|
||||
D: ?Sized + DocAllocator<'a, A>,
|
||||
{
|
||||
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
|
||||
match self {
|
||||
Statement::Binding(_, var, expr) => allocator
|
||||
.text(var.to_string())
|
||||
.append(allocator.space())
|
||||
.append(allocator.text("="))
|
||||
.append(allocator.space())
|
||||
.append(expr.pretty(allocator)),
|
||||
Statement::Print(_, var) => allocator
|
||||
.text("print")
|
||||
.append(allocator.space())
|
||||
.append(allocator.text(var.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Expression
|
||||
where
|
||||
A: 'a,
|
||||
D: ?Sized + DocAllocator<'a, A>,
|
||||
{
|
||||
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
|
||||
match self {
|
||||
Expression::Value(_, val) => val.pretty(allocator),
|
||||
Expression::Reference(_, var) => allocator.text(var.to_string()),
|
||||
Expression::Primitive(_, op, exprs) if BINARY_OPERATORS.contains(&op.as_ref()) => {
|
||||
assert_eq!(
|
||||
exprs.len(),
|
||||
2,
|
||||
"Found binary operator with {} components?",
|
||||
exprs.len()
|
||||
);
|
||||
|
||||
let left = exprs[0].pretty(allocator);
|
||||
let right = exprs[1].pretty(allocator);
|
||||
|
||||
left.append(allocator.space())
|
||||
.append(allocator.text(op.to_string()))
|
||||
.append(allocator.space())
|
||||
.append(right)
|
||||
.parens()
|
||||
}
|
||||
Expression::Primitive(_, op, exprs) => {
|
||||
let call = allocator.text(op.to_string());
|
||||
let args = exprs.iter().map(|x| x.pretty(allocator));
|
||||
let comma_sepped_args = allocator.intersperse(args, CommaSep {});
|
||||
call.append(comma_sepped_args.parens())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, 'b, D, A> Pretty<'a, D, A> for &'b Value
|
||||
where
|
||||
A: 'a,
|
||||
D: ?Sized + DocAllocator<'a, A>,
|
||||
{
|
||||
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
|
||||
match self {
|
||||
Value::Number(opt_base, value) => {
|
||||
let sign = if *value < 0 { "-" } else { "" };
|
||||
let value_str = match opt_base {
|
||||
None => format!("{}", value),
|
||||
Some(2) => format!("{}0b{:b}", sign, value.abs()),
|
||||
Some(8) => format!("{}0o{:o}", sign, value.abs()),
|
||||
Some(10) => format!("{}0d{}", sign, value.abs()),
|
||||
Some(16) => format!("{}0x{:x}", sign, value.abs()),
|
||||
Some(_) => format!("!!{}{:x}!!", sign, value.abs()),
|
||||
};
|
||||
|
||||
allocator.text(value_str)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct CommaSep {}
|
||||
|
||||
impl<'a, D, A> Pretty<'a, D, A> for CommaSep
|
||||
where
|
||||
A: 'a,
|
||||
D: ?Sized + DocAllocator<'a, A>,
|
||||
{
|
||||
fn pretty(self, allocator: &'a D) -> DocBuilder<'a, D, A> {
|
||||
allocator.text(",").append(allocator.space())
|
||||
}
|
||||
}
|
||||
63
src/syntax/simplify.rs
Normal file
63
src/syntax/simplify.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use crate::syntax::ast::{Expression, Program, Statement};
|
||||
|
||||
impl Program {
|
||||
pub fn simplify(mut self) -> Self {
|
||||
let mut new_statements = Vec::new();
|
||||
let mut gensym_index = 1;
|
||||
|
||||
for stmt in self.statements.drain(..) {
|
||||
new_statements.append(&mut stmt.simplify(&mut gensym_index));
|
||||
}
|
||||
|
||||
self.statements = new_statements;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Statement {
|
||||
pub fn simplify(self, gensym_index: &mut usize) -> Vec<Statement> {
|
||||
let mut new_statements = vec![];
|
||||
|
||||
match self {
|
||||
Statement::Print(_, _) => new_statements.push(self),
|
||||
Statement::Binding(_, _, Expression::Reference(_, _)) => new_statements.push(self),
|
||||
Statement::Binding(_, _, Expression::Value(_, _)) => new_statements.push(self),
|
||||
Statement::Binding(loc, name, value) => {
|
||||
let (mut prereqs, new_value) = value.rebind(&name, gensym_index);
|
||||
new_statements.append(&mut prereqs);
|
||||
new_statements.push(Statement::Binding(loc, name, new_value))
|
||||
}
|
||||
}
|
||||
|
||||
new_statements
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
fn rebind(self, base_name: &str, gensym_index: &mut usize) -> (Vec<Statement>, Expression) {
|
||||
match self {
|
||||
Expression::Value(_, _) => (vec![], self),
|
||||
Expression::Reference(_, _) => (vec![], self),
|
||||
Expression::Primitive(loc, prim, mut expressions) => {
|
||||
let mut prereqs = Vec::new();
|
||||
let mut new_exprs = Vec::new();
|
||||
|
||||
for expr in expressions.drain(..) {
|
||||
let (mut cur_prereqs, arg) = expr.rebind(base_name, gensym_index);
|
||||
prereqs.append(&mut cur_prereqs);
|
||||
new_exprs.push(arg);
|
||||
}
|
||||
|
||||
let new_name = format!("<{}:{}>", base_name, *gensym_index);
|
||||
*gensym_index += 1;
|
||||
prereqs.push(Statement::Binding(
|
||||
loc.clone(),
|
||||
new_name.clone(),
|
||||
Expression::Primitive(loc.clone(), prim, new_exprs),
|
||||
));
|
||||
|
||||
(prereqs, Expression::Reference(loc, new_name))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
117
src/syntax/token_stream.rs
Normal file
117
src/syntax/token_stream.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use crate::syntax::tokens::Token;
|
||||
use logos::{Logos, SpannedIter};
|
||||
use std::fmt;
|
||||
use thiserror::Error;
|
||||
|
||||
pub struct TokenStream<'s> {
|
||||
file_idx: usize,
|
||||
lexer: SpannedIter<'s, Token>,
|
||||
}
|
||||
|
||||
impl<'s> TokenStream<'s> {
|
||||
pub fn new(file_idx: usize, s: &'s str) -> TokenStream<'s> {
|
||||
TokenStream {
|
||||
file_idx,
|
||||
lexer: Token::lexer(s).spanned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Location {
|
||||
InFile(usize, usize),
|
||||
Manufactured,
|
||||
}
|
||||
|
||||
impl fmt::Display for Location {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Location::InFile(s, off) => write!(f, "{}:{}", s, off),
|
||||
Location::Manufactured => write!(f, "<manufactured>"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Location {
|
||||
fn new(file_idx: usize, offset: usize) -> Location {
|
||||
Location::InFile(file_idx, offset)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Location {
|
||||
fn default() -> Self {
|
||||
Location::Manufactured
|
||||
}
|
||||
}
|
||||
|
||||
type LocatedToken = Result<(Location, Token, Location), LexerError>;
|
||||
|
||||
impl<'s> Iterator for TokenStream<'s> {
|
||||
type Item = LocatedToken;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.lexer.next() {
|
||||
None => None,
|
||||
Some((Token::Error, span)) => Some(Err(LexerError::new(self.file_idx, span.start))),
|
||||
Some((token, span)) => {
|
||||
let start = Location::new(self.file_idx, span.start);
|
||||
let end = Location::new(self.file_idx, span.end);
|
||||
Some(Ok((start, token, end)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_works() {
|
||||
let fidx = 42;
|
||||
let mut lex0 = TokenStream::new(42, "y = x + 1//foo");
|
||||
assert_eq!(
|
||||
lex0.next(),
|
||||
Some(Ok((
|
||||
Location::new(fidx, 0),
|
||||
Token::var("y"),
|
||||
Location::new(fidx, 1)
|
||||
)))
|
||||
);
|
||||
assert_eq!(
|
||||
lex0.next(),
|
||||
Some(Ok((
|
||||
Location::new(fidx, 2),
|
||||
Token::Equals,
|
||||
Location::new(fidx, 3)
|
||||
)))
|
||||
);
|
||||
assert_eq!(
|
||||
lex0.next(),
|
||||
Some(Ok((
|
||||
Location::new(fidx, 4),
|
||||
Token::var("x"),
|
||||
Location::new(fidx, 5)
|
||||
)))
|
||||
);
|
||||
assert_eq!(
|
||||
lex0.next(),
|
||||
Some(Ok((
|
||||
Location::new(fidx, 6),
|
||||
Token::Operator('+'),
|
||||
Location::new(fidx, 7)
|
||||
)))
|
||||
);
|
||||
assert_eq!(
|
||||
lex0.next(),
|
||||
Some(Ok((
|
||||
Location::new(fidx, 8),
|
||||
Token::Number((None, 1)),
|
||||
Location::new(fidx, 9)
|
||||
)))
|
||||
);
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errors_work() {
|
||||
let fidx = 2;
|
||||
let mut lex0 = TokenStream::new(2, "\u{2639}");
|
||||
assert_eq!(lex0.next(), Some(Err(LexerError::new(fidx, 0))));
|
||||
}
|
||||
123
src/syntax/tokens.rs
Normal file
123
src/syntax/tokens.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use internment::ArcIntern;
|
||||
use logos::{Lexer, Logos};
|
||||
use std::fmt;
|
||||
use std::num::ParseIntError;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Logos, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum Token {
|
||||
#[token("=")]
|
||||
Equals,
|
||||
|
||||
#[token(";")]
|
||||
Semi,
|
||||
|
||||
#[token("(")]
|
||||
LeftParen,
|
||||
|
||||
#[token(")")]
|
||||
RightParen,
|
||||
|
||||
#[token("print")]
|
||||
Print,
|
||||
|
||||
#[regex(r"[+\-*/]", |v| v.slice().chars().next())]
|
||||
Operator(char),
|
||||
|
||||
#[regex(r"0b[01]+", |v| parse_number(Some(2), v))]
|
||||
#[regex(r"0o[0-7]+", |v| parse_number(Some(8), v))]
|
||||
#[regex(r"0d[0-9]+", |v| parse_number(Some(10), v))]
|
||||
#[regex(r"0x[0-9a-fA-F]+", |v| parse_number(Some(16), v))]
|
||||
#[regex(r"[0-9]+", |v| parse_number(None, v))]
|
||||
Number((Option<u8>, i64)),
|
||||
|
||||
#[regex(r"[a-z][a-zA-Z0-9_]*", |v| ArcIntern::new(v.slice().to_string()))]
|
||||
Variable(ArcIntern<String>),
|
||||
|
||||
#[error]
|
||||
#[regex(r"[ \t\r\n\f]+", logos::skip)]
|
||||
#[regex(r"//.*", logos::skip)]
|
||||
Error,
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Token::Equals => write!(f, "'='"),
|
||||
Token::Semi => write!(f, "';'"),
|
||||
Token::LeftParen => write!(f, "'('"),
|
||||
Token::RightParen => write!(f, "')'"),
|
||||
Token::Print => write!(f, "'print'"),
|
||||
Token::Operator(c) => write!(f, "'{}'", c),
|
||||
Token::Number((None, v)) => write!(f, "'{}'", v),
|
||||
Token::Number((Some(2), v)) => write!(f, "'0b{:b}'", v),
|
||||
Token::Number((Some(8), v)) => write!(f, "'0o{:o}'", v),
|
||||
Token::Number((Some(10), v)) => write!(f, "'{}'", v),
|
||||
Token::Number((Some(16), v)) => write!(f, "'0x{:x}'", v),
|
||||
Token::Number((Some(b), v)) => {
|
||||
write!(f, "Invalidly-based-number<base={},val={}>", b, v)
|
||||
}
|
||||
Token::Variable(s) => write!(f, "'{}'", s),
|
||||
Token::Error => write!(f, "<error>"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error, PartialEq, Eq)]
|
||||
pub enum LexerError {
|
||||
#[error("Failed lexing at {0}")]
|
||||
LexFailure(usize),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Token {
|
||||
pub(crate) fn var(s: &str) -> Token {
|
||||
Token::Variable(ArcIntern::new(s.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_number(
|
||||
base: Option<u8>,
|
||||
value: &Lexer<Token>,
|
||||
) -> Result<(Option<u8>, i64), ParseIntError> {
|
||||
let (radix, strval) = match base {
|
||||
None => (10, value.slice()),
|
||||
Some(radix) => (radix, &value.slice()[2..]),
|
||||
};
|
||||
|
||||
let intval = i64::from_str_radix(strval, radix as u32)?;
|
||||
Ok((base, intval))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lex_numbers() {
|
||||
let mut lex0 = Token::lexer("12 0b1100 0o14 0d12 0xc // 9");
|
||||
assert_eq!(lex0.next(), Some(Token::Number((None, 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(2), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(8), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(10), 12))));
|
||||
assert_eq!(lex0.next(), Some(Token::Number((Some(16), 12))));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lex_symbols() {
|
||||
let mut lex0 = Token::lexer("x + \t y * \n z // rest");
|
||||
assert_eq!(lex0.next(), Some(Token::var("x")));
|
||||
assert_eq!(lex0.next(), Some(Token::Operator('+')));
|
||||
assert_eq!(lex0.next(), Some(Token::var("y")));
|
||||
assert_eq!(lex0.next(), Some(Token::Operator('*')));
|
||||
assert_eq!(lex0.next(), Some(Token::var("z")));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexer_spans() {
|
||||
let mut lex0 = Token::lexer("y = x + 1//foo").spanned();
|
||||
assert_eq!(lex0.next(), Some((Token::var("y"), 0..1)));
|
||||
assert_eq!(lex0.next(), Some((Token::Equals, 2..3)));
|
||||
assert_eq!(lex0.next(), Some((Token::var("x"), 4..5)));
|
||||
assert_eq!(lex0.next(), Some((Token::Operator('+'), 6..7)));
|
||||
assert_eq!(lex0.next(), Some((Token::Number((None, 1)), 8..9)));
|
||||
assert_eq!(lex0.next(), None);
|
||||
}
|
||||
116
src/syntax/validate.rs
Normal file
116
src/syntax/validate.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
use crate::syntax::{Expression, Location, Program, Statement};
|
||||
use codespan_reporting::diagnostic::Diagnostic;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub enum Error {
|
||||
UnboundVariable(Location, String),
|
||||
}
|
||||
|
||||
impl From<Error> for Diagnostic<usize> {
|
||||
fn from(x: Error) -> Self {
|
||||
match &x {
|
||||
Error::UnboundVariable(location, name) => location
|
||||
.labelled_error("unbound here")
|
||||
.with_message(format!("Unbound variable '{}'", name)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Warning {
|
||||
ShadowedVariable(Location, Location, String),
|
||||
}
|
||||
|
||||
impl From<Warning> for Diagnostic<usize> {
|
||||
fn from(x: Warning) -> Self {
|
||||
match &x {
|
||||
Warning::ShadowedVariable(original, new, name) => Diagnostic::warning()
|
||||
.with_labels(vec![
|
||||
new.primary_label().with_message("variable rebound here"),
|
||||
original
|
||||
.secondary_label()
|
||||
.with_message("original binding site"),
|
||||
])
|
||||
.with_message(format!("Variable '{}' is rebound", name)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Program {
|
||||
pub fn validate(&self) -> (Vec<Error>, Vec<Warning>) {
|
||||
let mut errors = vec![];
|
||||
let mut warnings = vec![];
|
||||
let mut bound_variables = HashMap::new();
|
||||
|
||||
for stmt in self.statements.iter() {
|
||||
let (mut new_errors, mut new_warnings) = stmt.validate(&mut bound_variables);
|
||||
errors.append(&mut new_errors);
|
||||
warnings.append(&mut new_warnings);
|
||||
}
|
||||
|
||||
(errors, warnings)
|
||||
}
|
||||
}
|
||||
|
||||
impl Statement {
|
||||
pub fn validate(
|
||||
&self,
|
||||
bound_variables: &mut HashMap<String, Location>,
|
||||
) -> (Vec<Error>, Vec<Warning>) {
|
||||
let mut errors = vec![];
|
||||
let mut warnings = vec![];
|
||||
|
||||
match self {
|
||||
Statement::Binding(loc, var, val) => {
|
||||
// we're going to make the decision that a variable is not bound in the right
|
||||
// hand side of its binding, which makes a lot of things easier. So we'll just
|
||||
// immediately check the expression, and go from there.
|
||||
let (mut exp_errors, mut exp_warnings) = val.validate(bound_variables);
|
||||
|
||||
errors.append(&mut exp_errors);
|
||||
warnings.append(&mut exp_warnings);
|
||||
if let Some(original_binding_site) = bound_variables.get(var) {
|
||||
warnings.push(Warning::ShadowedVariable(
|
||||
original_binding_site.clone(),
|
||||
loc.clone(),
|
||||
var.clone(),
|
||||
));
|
||||
} else {
|
||||
bound_variables.insert(var.clone(), loc.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Statement::Print(_, var) if bound_variables.contains_key(var) => {}
|
||||
Statement::Print(loc, var) => {
|
||||
errors.push(Error::UnboundVariable(loc.clone(), var.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
(errors, warnings)
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
fn validate(&self, variable_map: &HashMap<String, Location>) -> (Vec<Error>, Vec<Warning>) {
|
||||
match self {
|
||||
Expression::Value(_, _) => (vec![], vec![]),
|
||||
Expression::Reference(_, var) if variable_map.contains_key(var) => (vec![], vec![]),
|
||||
Expression::Reference(loc, var) => (
|
||||
vec![Error::UnboundVariable(loc.clone(), var.clone())],
|
||||
vec![],
|
||||
),
|
||||
Expression::Primitive(_, _, args) => {
|
||||
let mut errors = vec![];
|
||||
let mut warnings = vec![];
|
||||
|
||||
for expr in args.iter() {
|
||||
let (mut err, mut warn) = expr.validate(variable_map);
|
||||
errors.append(&mut err);
|
||||
warnings.append(&mut warn);
|
||||
}
|
||||
|
||||
(errors, warnings)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user