Let locations be ranges, not just specific indexes.

This commit is contained in:
2023-07-22 14:50:06 -07:00
parent a8d32a917f
commit 833c9d5350
9 changed files with 141 additions and 101 deletions

View File

@@ -134,10 +134,10 @@ impl REPL {
// if this is a variable binding, and we've never defined this variable before,
// we should tell cranelift about it. this is optimistic; if we fail to compile,
// then we won't use this definition until someone tries again.
if !self.variable_binding_sites.contains_key(&name) {
self.jitter.define_string(&name)?;
if !self.variable_binding_sites.contains_key(&name.name) {
self.jitter.define_string(&name.name)?;
self.jitter
.define_variable(name.clone(), ConstantType::U64)?;
.define_variable(name.to_string(), ConstantType::U64)?;
}
crate::syntax::Program {

View File

@@ -73,12 +73,12 @@ pub enum ParserError {
/// Raised when we're parsing the file, and run into a token in a
/// place we weren't expecting it.
#[error("Unrecognized token")]
UnrecognizedToken(Location, Location, Token, Vec<String>),
UnrecognizedToken(Location, Token, Vec<String>),
/// Raised when we were expecting the end of the file, but instead
/// got another token.
#[error("Extra token")]
ExtraToken(Location, Token, Location),
ExtraToken(Location, Token),
/// Raised when the lexer just had some sort of internal problem
/// and just gave up.
@@ -106,30 +106,28 @@ impl ParserError {
fn convert(file_idx: usize, err: ParseError<usize, Token, LexerError>) -> Self {
match err {
ParseError::InvalidToken { location } => {
ParserError::InvalidToken(Location::new(file_idx, location))
ParserError::InvalidToken(Location::new(file_idx, location..location+1))
}
ParseError::UnrecognizedEof { location, expected } => {
ParserError::UnrecognizedEOF(Location::new(file_idx, location), expected)
ParserError::UnrecognizedEOF(Location::new(file_idx, location..location+1), expected)
}
ParseError::UnrecognizedToken {
token: (start, token, end),
expected,
} => ParserError::UnrecognizedToken(
Location::new(file_idx, start),
Location::new(file_idx, end),
Location::new(file_idx, start..end),
token,
expected,
),
ParseError::ExtraToken {
token: (start, token, end),
} => ParserError::ExtraToken(
Location::new(file_idx, start),
Location::new(file_idx, start..end),
token,
Location::new(file_idx, end),
),
ParseError::User { error } => match error {
LexerError::LexFailure(offset) => {
ParserError::LexFailure(Location::new(file_idx, offset))
ParserError::LexFailure(Location::new(file_idx, offset..offset+1))
}
},
}
@@ -180,37 +178,29 @@ impl<'a> From<&'a ParserError> for Diagnostic<usize> {
),
// encountered a token where it shouldn't be
ParserError::UnrecognizedToken(start, end, token, expected) => {
ParserError::UnrecognizedToken(loc, token, expected) => {
let expected_str =
format!("unexpected token {}{}", token, display_expected(expected));
let unexpected_str = format!("unexpected token {}", token);
let labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
.with_labels(vec![
loc.primary_label().with_message(unexpected_str)
])
}
// I think we get this when we get a token, but were expected EOF
ParserError::ExtraToken(start, token, end) => {
ParserError::ExtraToken(loc, token) => {
let expected_str =
format!("unexpected token {} after the expected end of file", token);
let unexpected_str = format!("unexpected token {}", token);
let labels = start.range_label(end);
Diagnostic::error()
.with_labels(
labels
.into_iter()
.map(|l| l.with_message(unexpected_str.clone()))
.collect(),
)
.with_message(expected_str)
.with_labels(vec![
loc.primary_label().with_message(unexpected_str)
])
}
// simple lexer errors
@@ -293,23 +283,23 @@ fn order_of_operations() {
Program::from_str(muladd1).unwrap(),
Program {
statements: vec![Statement::Binding(
Location::new(testfile, 0),
"x".to_string(),
Location::new(testfile, 0..1),
Name::manufactured("x"),
Expression::Primitive(
Location::new(testfile, 6),
Location::new(testfile, 6..7),
"+".to_string(),
vec![
Expression::Value(Location::new(testfile, 4), Value::Number(None, None, 1),),
Expression::Value(Location::new(testfile, 4..5), Value::Number(None, None, 1),),
Expression::Primitive(
Location::new(testfile, 10),
Location::new(testfile, 10..11),
"*".to_string(),
vec![
Expression::Value(
Location::new(testfile, 8),
Location::new(testfile, 8..9),
Value::Number(None, None, 2),
),
Expression::Value(
Location::new(testfile, 12),
Location::new(testfile, 12..13),
Value::Number(None, None, 3),
),
]

View File

@@ -1,4 +1,4 @@
use crate::syntax::ast::{ConstantType, Expression, Program, Statement, Value};
use crate::syntax::ast::{ConstantType, Expression, Name, Program, Statement, Value};
use crate::syntax::location::Location;
use proptest::sample::select;
use proptest::{
@@ -10,15 +10,12 @@ use std::collections::HashMap;
const VALID_VARIABLE_NAMES: &str = r"[a-z][a-zA-Z0-9_]*";
const OPERATORS: &[(&str, usize)] = &[("+", 2), ("-", 1), ("-", 2), ("*", 2), ("/", 2)];
#[derive(Clone, Debug)]
struct Name(String);
impl Arbitrary for Name {
type Parameters = ();
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(_: Self::Parameters) -> Self::Strategy {
VALID_VARIABLE_NAMES.prop_map(Name).boxed()
VALID_VARIABLE_NAMES.prop_map(Name::manufactured).boxed()
}
}
@@ -67,12 +64,12 @@ impl Arbitrary for Program {
output_type: Some(psi.binding_type),
});
defined_variables.insert(psi.name.0.clone(), psi.binding_type);
defined_variables.insert(psi.name.name.clone(), psi.binding_type);
statements.push(
expr.prop_map(move |expr| {
Statement::Binding(
Location::manufactured(),
psi.name.0.clone(),
psi.name.clone(),
expr,
)
})
@@ -81,7 +78,7 @@ impl Arbitrary for Program {
} else {
let printers = defined_variables
.keys()
.map(|n| Just(Statement::Print(Location::manufactured(), n.clone())));
.map(|n| Just(Statement::Print(Location::manufactured(), Name::manufactured(n))));
statements.push(Union::new(printers).boxed());
}
}

View File

@@ -1,3 +1,8 @@
use std::fmt;
use std::hash::Hash;
use internment::ArcIntern;
pub use crate::syntax::tokens::ConstantType;
use crate::syntax::Location;
@@ -14,6 +19,50 @@ pub struct Program {
pub statements: Vec<Statement>,
}
/// A Name.
///
/// This is basically a string, but annotated with the place the string
/// is in the source file.
#[derive(Clone, Debug)]
pub struct Name {
pub name: String,
pub location: Location,
}
impl Name {
pub fn new<S: ToString>(n: S, location: Location) -> Name {
Name{ name: n.to_string(), location }
}
pub fn manufactured<S: ToString>(n: S) -> Name {
Name{ name: n.to_string(), location: Location::manufactured() }
}
pub fn intern(self) -> ArcIntern<String> {
ArcIntern::new(self.name)
}
}
impl PartialEq for Name {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Eq for Name {}
impl Hash for Name {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.name.hash(state)
}
}
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.name.fmt(f)
}
}
/// A parsed statement.
///
/// Statements are guaranteed to be syntactically valid, but may be
@@ -27,8 +76,8 @@ pub struct Program {
/// thing, not if they are the exact same statement.
#[derive(Clone, Debug)]
pub enum Statement {
Binding(Location, String, Expression),
Print(Location, String),
Binding(Location, Name, Expression),
Print(Location, Name),
}
impl PartialEq for Statement {

View File

@@ -25,11 +25,11 @@ impl Program {
match stmt {
Statement::Binding(_, name, value) => {
let actual_value = value.eval(&env)?;
env = env.extend(ArcIntern::new(name.clone()), actual_value);
env = env.extend(name.clone().intern(), actual_value);
}
Statement::Print(_, name) => {
let value = env.lookup(ArcIntern::new(name.clone()))?;
let value = env.lookup(name.clone().intern())?;
let line = format!("{} = {}\n", name, value);
stdout.push_str(&line);
}

View File

@@ -1,3 +1,5 @@
use std::ops::Range;
use codespan_reporting::diagnostic::{Diagnostic, Label};
/// A source location, for use in pointing users towards warnings and errors.
@@ -7,7 +9,7 @@ use codespan_reporting::diagnostic::{Diagnostic, Label};
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Location {
file_idx: usize,
offset: usize,
location: Range<usize>,
}
impl Location {
@@ -17,8 +19,8 @@ impl Location {
/// The file index is based on the file database being used. See the
/// `codespan_reporting::files::SimpleFiles::add` function, which is
/// normally where we get this index.
pub fn new(file_idx: usize, offset: usize) -> Self {
Location { file_idx, offset }
pub fn new(file_idx: usize, location: Range<usize>) -> Self {
Location { file_idx, location }
}
/// Generate a `Location` for a completely manufactured bit of code.
@@ -30,7 +32,7 @@ impl Location {
pub fn manufactured() -> Self {
Location {
file_idx: 0,
offset: 0,
location: 0..0,
}
}
@@ -47,7 +49,7 @@ impl Location {
/// actually happened), but you'd probably want to make the first location
/// the secondary label to help users find it.
pub fn primary_label(&self) -> Label<usize> {
Label::primary(self.file_idx, self.offset..self.offset)
Label::primary(self.file_idx, self.location.clone())
}
/// Generate a secondary label for a [`Diagnostic`], based on this source
@@ -64,35 +66,7 @@ impl Location {
/// probably want to make the first location the secondary label to help
/// users find it.
pub fn secondary_label(&self) -> Label<usize> {
Label::secondary(self.file_idx, self.offset..self.offset)
}
/// Given this location and another, generate a primary label that
/// specifies the area between those two locations.
///
/// See [`Self::primary_label`] for some discussion of primary versus
/// secondary labels. If the two locations are the same, this method does
/// the exact same thing as [`Self::primary_label`]. If this item was
/// generated by [`Self::manufactured`], it will act as if you'd called
/// `primary_label` on the argument. Otherwise, it will generate the obvious
/// span.
///
/// This function will return `None` only in the case that you provide
/// labels from two different files, which it cannot sensibly handle.
pub fn range_label(&self, end: &Location) -> Option<Label<usize>> {
if self.file_idx == 0 {
return Some(end.primary_label());
}
if self.file_idx != end.file_idx {
return None;
}
if self.offset > end.offset {
Some(Label::primary(self.file_idx, end.offset..self.offset))
} else {
Some(Label::primary(self.file_idx, self.offset..end.offset))
}
Label::secondary(self.file_idx, self.location.clone())
}
/// Return an error diagnostic centered at this location.
@@ -104,7 +78,7 @@ impl Location {
pub fn error(&self) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx,
self.offset..self.offset,
self.location.clone(),
)])
}
@@ -117,8 +91,23 @@ impl Location {
pub fn labelled_error(&self, msg: &str) -> Diagnostic<usize> {
Diagnostic::error().with_labels(vec![Label::primary(
self.file_idx,
self.offset..self.offset,
self.location.clone(),
)
.with_message(msg)])
}
/// Merge two locations into a single location spanning the whole range between
/// them.
///
/// This function returns None if the locations are from different files; this
/// can happen if one of the locations is manufactured, for example.
pub fn merge(&self, other: &Self) -> Option<Self> {
if self.file_idx != other.file_idx {
None
} else {
let start = if self.location.start <= other.location.start { self.location.start } else { other.location.start };
let end = if self.location.end >= other.location.end { self.location.end } else { other.location.end };
Some(Location { file_idx: self.file_idx, location: start..end })
}
}
}

View File

@@ -9,7 +9,7 @@
//! eventually want to leave lalrpop behind.)
//!
use crate::syntax::{LexerError, Location};
use crate::syntax::ast::{Program,Statement,Expression,Value};
use crate::syntax::ast::{Program,Statement,Expression,Value,Name};
use crate::syntax::tokens::{ConstantType, Token};
use internment::ArcIntern;
@@ -91,10 +91,19 @@ pub Statement: Statement = {
// A statement can be a variable binding. Note, here, that we use this
// funny @L thing to get the source location before the variable, so that
// we can say that this statement spans across everything.
<l:@L> <v:"<var>"> "=" <e:Expression> ";" => Statement::Binding(Location::new(file_idx, l), v.to_string(), e),
<ls: @L> <v:"<var>"> <var_end: @L> "=" <e:Expression> ";" <le: @L> =>
Statement::Binding(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, ls..var_end)),
e,
),
// Alternatively, a statement can just be a print statement.
"print" <l:@L> <v:"<var>"> ";" => Statement::Print(Location::new(file_idx, l), v.to_string()),
<ls: @L> "print" <name_start: @L> <v:"<var>"> <name_end: @L> ";" <le: @L> =>
Statement::Print(
Location::new(file_idx, ls..le),
Name::new(v, Location::new(file_idx, name_start..name_end)),
),
}
// Expressions! Expressions are a little fiddly, because we're going to
@@ -126,21 +135,27 @@ Expression: Expression = {
// we group addition and subtraction under the heading "additive"
AdditiveExpression: Expression = {
<e1:AdditiveExpression> <l:@L> "+" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "+".to_string(), vec![e1, e2]),
<e1:AdditiveExpression> <l:@L> "-" <e2:MultiplicativeExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e1, e2]),
<ls: @L> <e1:AdditiveExpression> <l: @L> "+" <e2:MultiplicativeExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "+".to_string(), vec![e1, e2]),
<ls: @L> <e1:AdditiveExpression> <l: @L> "-" <e2:MultiplicativeExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "-".to_string(), vec![e1, e2]),
MultiplicativeExpression,
}
// similarly, we group multiplication and division under "multiplicative"
MultiplicativeExpression: Expression = {
<e1:MultiplicativeExpression> <l:@L> "*" <e2:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "*".to_string(), vec![e1, e2]),
<e1:MultiplicativeExpression> <l:@L> "/" <e2:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "/".to_string(), vec![e1, e2]),
<ls: @L> <e1:MultiplicativeExpression> <l: @L> "*" <e2:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "*".to_string(), vec![e1, e2]),
<ls: @L> <e1:MultiplicativeExpression> <l: @L> "/" <e2:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, ls..le), "/".to_string(), vec![e1, e2]),
UnaryExpression,
}
UnaryExpression: Expression = {
<l:@L> "-" <e:UnaryExpression> => Expression::Primitive(Location::new(file_idx, l), "-".to_string(), vec![e]),
<l:@L> "<" <v:"<var>"> ">" <e:UnaryExpression> => Expression::Cast(Location::new(file_idx, l), v.to_string(), Box::new(e)),
<l: @L> "-" <e:UnaryExpression> <le: @L> =>
Expression::Primitive(Location::new(file_idx, l..le), "-".to_string(), vec![e]),
<l: @L> "<" <v:"<var>"> ">" <e:UnaryExpression> <le: @L> =>
Expression::Cast(Location::new(file_idx, l..le), v.to_string(), Box::new(e)),
AtomicExpression,
}
@@ -148,9 +163,9 @@ UnaryExpression: Expression = {
// they cannot be further divided into parts
AtomicExpression: Expression = {
// just a variable reference
<l:@L> <v:"<var>"> => Expression::Reference(Location::new(file_idx, l), v.to_string()),
<l: @L> <v:"<var>"> <end: @L> => Expression::Reference(Location::new(file_idx, l..end), v.to_string()),
// just a number
<l:@L> <n:"<num>"> => Expression::Value(Location::new(file_idx, l), Value::Number(n.0, n.1, n.2)),
<l: @L> <n:"<num>"> <end: @L> => Expression::Value(Location::new(file_idx, l..end), Value::Number(n.0, n.1, n.2)),
// finally, let people parenthesize expressions and get back to a
// lower precedence
"(" <e:Expression> ")" => e,

View File

@@ -117,20 +117,20 @@ impl Statement {
errors.append(&mut exp_errors);
warnings.append(&mut exp_warnings);
if let Some(original_binding_site) = bound_variables.get(var) {
if let Some(original_binding_site) = bound_variables.get(&var.name) {
warnings.push(Warning::ShadowedVariable(
original_binding_site.clone(),
loc.clone(),
var.clone(),
var.to_string(),
));
} else {
bound_variables.insert(var.clone(), loc.clone());
bound_variables.insert(var.to_string(), loc.clone());
}
}
Statement::Print(_, var) if bound_variables.contains_key(var) => {}
Statement::Print(_, var) if bound_variables.contains_key(&var.name) => {}
Statement::Print(loc, var) => {
errors.push(Error::UnboundVariable(loc.clone(), var.clone()))
errors.push(Error::UnboundVariable(loc.clone(), var.to_string()))
}
}

View File

@@ -53,7 +53,7 @@ fn convert_statement(
) -> Vec<ir::Statement> {
match statement {
syntax::Statement::Print(loc, name) => {
let iname = ArcIntern::new(name);
let iname = ArcIntern::new(name.to_string());
let final_name = renames
.get(&iname)
.map(Clone::clone)
@@ -71,7 +71,7 @@ fn convert_statement(
syntax::Statement::Binding(loc, name, expr) => {
let (mut prereqs, expr, ty) =
convert_expression(expr, constraint_db, renames, bindings);
let iname = ArcIntern::new(name);
let iname = ArcIntern::new(name.to_string());
let final_name = if bindings.contains_key(&iname) {
let new_name = ir::gensym(iname.as_str());
renames.insert(iname, new_name.clone());