1786 lines
66 KiB
Rust
1786 lines
66 KiB
Rust
use crate::syntax::error::ParserError;
|
|
use crate::syntax::tokens::{Lexer, LocatedToken, Token};
|
|
use crate::syntax::*;
|
|
use internment::ArcIntern;
|
|
use std::collections::HashMap;
|
|
use std::ops::Range;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
/// A parser for a particular file.
|
|
///
|
|
/// This parser should be used for exactly one file, and its lifetime
|
|
/// must be tied to the underlying lexer. However, after the parser is
|
|
/// done, the resultant object should have no lifetime links to the
|
|
/// original file, so it can be thrown away.
|
|
///
|
|
/// The parser includes information about operator precedence that is
|
|
/// stateful.
|
|
pub struct Parser<'lexer> {
|
|
file: ArcIntern<PathBuf>,
|
|
lexer: Lexer<'lexer>,
|
|
known_tokens: Vec<LocatedToken>,
|
|
prefix_precedence_table: HashMap<String, u8>,
|
|
infix_precedence_table: HashMap<String, (u8, u8)>,
|
|
postfix_precedence_table: HashMap<String, u8>,
|
|
}
|
|
|
|
/// The directional associativity for an operator.
|
|
///
|
|
/// This directionality impacts whether (a + b + c) defaults to
|
|
/// ((a + b) + c) or (a + (b + c)). It does not effect situations
|
|
/// in which operator numeric precedence is different between
|
|
/// operators.
|
|
pub enum Associativity {
|
|
Left,
|
|
Right,
|
|
None,
|
|
}
|
|
|
|
/// The kind of operators we use. This is only narrowly useful inside
|
|
/// this particular crate.
|
|
enum OperatorType {
|
|
Prefix,
|
|
Infix,
|
|
Postfix,
|
|
}
|
|
|
|
impl<'lexer> Parser<'lexer> {
|
|
/// Create a new parser from the given file index and lexer.
|
|
///
|
|
/// The file index will be used for annotating locations and for
|
|
/// error messages. If you don't care about either, you can use
|
|
/// 0 with no loss of functionality. (Obviously, it will be harder
|
|
/// to create quality error messages, but you already knew that.)
|
|
pub fn new<P: AsRef<Path>>(file: P, lexer: Lexer<'lexer>) -> Parser<'lexer> {
|
|
Parser {
|
|
file: ArcIntern::new(file.as_ref().to_path_buf()),
|
|
lexer,
|
|
known_tokens: vec![],
|
|
prefix_precedence_table: HashMap::new(),
|
|
infix_precedence_table: HashMap::new(),
|
|
postfix_precedence_table: HashMap::new(),
|
|
}
|
|
}
|
|
|
|
/// Add the given operator to our precedence table, at the given
|
|
/// precedence level and associativity.
|
|
///
|
|
/// This is used for infix operators, only.
|
|
pub fn add_infix_precedence<S: ToString>(
|
|
&mut self,
|
|
operator: S,
|
|
associativity: Associativity,
|
|
level: u8,
|
|
) {
|
|
let actual_associativity = match associativity {
|
|
Associativity::Left => (level * 2, (level * 2) + 1),
|
|
Associativity::Right => ((level * 2) + 1, level * 2),
|
|
Associativity::None => (level * 2, level * 2),
|
|
};
|
|
|
|
self.infix_precedence_table
|
|
.insert(operator.to_string(), actual_associativity);
|
|
}
|
|
|
|
/// Add the given operator to our precedence table, at the given
|
|
/// precedence level and associativity.
|
|
///
|
|
/// This is used for prefix operators, only.
|
|
pub fn add_prefix_precedence<S: ToString>(&mut self, operator: S, level: u8) {
|
|
self.prefix_precedence_table
|
|
.insert(operator.to_string(), level * 2);
|
|
}
|
|
|
|
/// Add the given operator to our precedence table, at the given
|
|
/// precedence level and associativity.
|
|
///
|
|
/// This is used for postfix operators, only.
|
|
pub fn add_postfix_precedence<S: ToString>(&mut self, operator: S, level: u8) {
|
|
self.postfix_precedence_table
|
|
.insert(operator.to_string(), level * 2);
|
|
}
|
|
|
|
/// Get the precedence of the given operator.
|
|
///
|
|
/// FIXME: This currently only functions on infix operators, not
|
|
/// prefix and postfix. In general, this can all be cleaned up.
|
|
fn get_precedence(&self, name: &String) -> (u8, u8) {
|
|
match self.infix_precedence_table.get(name) {
|
|
None => (19, 20),
|
|
Some(x) => *x,
|
|
}
|
|
}
|
|
|
|
/// Get the next token from the input stream, or None if we're at
|
|
/// the end of a stream.
|
|
///
|
|
/// Ok(None) represents "we have reached the end of the stream", while
|
|
/// an Err(_) means that we ran into some sort of error (UTF-8 formatting,
|
|
/// lexing, IO, etc.) in reading the stream.
|
|
pub fn next(&mut self) -> Result<Option<LocatedToken>, ParserError> {
|
|
let result = self.known_tokens.pop();
|
|
|
|
if result.is_some() {
|
|
Ok(result)
|
|
} else {
|
|
self.lexer
|
|
.next()
|
|
.transpose()
|
|
.map_err(|error| ParserError::LexerError {
|
|
file: self.file.clone(),
|
|
error,
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Save the given token back to the top of the stream.
|
|
///
|
|
/// This is essentially an "undo" on next(), or an alternative path for
|
|
/// peeking at the next token in the stream.
|
|
fn save(&mut self, token: LocatedToken) {
|
|
self.known_tokens.push(token)
|
|
}
|
|
|
|
/// Get the location of the next token in the stream.
|
|
///
|
|
/// This will return an error if we're at the end of the file.
|
|
fn current_location(&mut self) -> Result<Location, ParserError> {
|
|
let current = self.next()?;
|
|
match current {
|
|
None => Err(self.bad_eof("trying to get current location")),
|
|
Some(token) => {
|
|
let retval = self.to_location(token.span.clone());
|
|
self.save(token);
|
|
Ok(retval)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Generate the parser error that should happen when we hit an EOF
|
|
/// in a bad place.
|
|
fn bad_eof<S: ToString>(&mut self, place: S) -> ParserError {
|
|
ParserError::UnacceptableEof {
|
|
file: self.file.clone(),
|
|
place: place.to_string(),
|
|
}
|
|
}
|
|
|
|
/// Convert an offset into a formal location that can be saved off
|
|
/// into ASTs.
|
|
fn to_location(&self, span: Range<usize>) -> Location {
|
|
Location::new(&self.file, span)
|
|
}
|
|
|
|
/// See if the next token is the keyword, as expected.
|
|
///
|
|
/// If it isn't, this routine will provide an error, but it will make
|
|
/// sure to put the token back into the stream.
|
|
fn require_keyword(&mut self, keyword: &'static str) -> Result<Location, ParserError> {
|
|
match self.next()? {
|
|
None => Err(self.bad_eof(format!("looking for keyword '{keyword}'"))),
|
|
Some(ltoken) => match ltoken.token {
|
|
Token::ValueName(s) if s.as_str() == keyword => Ok(self.to_location(ltoken.span)),
|
|
_ => {
|
|
self.save(ltoken.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: ltoken.span,
|
|
token: ltoken.token,
|
|
expected: format!("keyword {keyword}"),
|
|
})
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
/// See if the next token is an operator, as expected.
|
|
///
|
|
/// If it isn't, this routine will provide an error, but it will make
|
|
/// sure to put the token back into the stream.
|
|
fn require_operator(&mut self, op: &'static str) -> Result<Location, ParserError> {
|
|
match self.next()? {
|
|
None => Err(self.bad_eof(format!("looking for symbol '{op}'"))),
|
|
Some(ltoken) => match ltoken.token {
|
|
Token::OperatorName(s) if s.as_str() == op => Ok(self.to_location(ltoken.span)),
|
|
_ => {
|
|
self.save(ltoken.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: ltoken.span,
|
|
token: ltoken.token,
|
|
expected: format!("symbol {op}"),
|
|
})
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
/// See if the next token is the given one, as expected.
|
|
///
|
|
/// If it isn't, this routine will provide an error, but it will make
|
|
/// sure to put the token back into the stream.
|
|
fn require_token(
|
|
&mut self,
|
|
token: Token,
|
|
place: &'static str,
|
|
) -> Result<Location, ParserError> {
|
|
let message = || format!("looking for '{token}' in {place}");
|
|
let next = self.next()?.ok_or_else(|| self.bad_eof(message()))?;
|
|
|
|
if next.token != token {
|
|
self.save(next.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: message(),
|
|
})
|
|
} else {
|
|
Ok(self.to_location(next.span))
|
|
}
|
|
}
|
|
|
|
/// Parse the top level file associated with a Bang module.
|
|
///
|
|
/// This will expect to read until EOF, and will fail or stall
|
|
/// forever if there is no EOF, or the EOF ends in the wrong
|
|
/// place. So this should *not* be used for interactive sessions,
|
|
/// because those are unlikely to have EOFs in the appropriate
|
|
/// places.
|
|
pub fn parse_module(&mut self) -> Result<Module, ParserError> {
|
|
let mut definitions = vec![];
|
|
|
|
loop {
|
|
if let Some(next_token) = self.next()? {
|
|
self.save(next_token);
|
|
definitions.push(self.parse_definition()?);
|
|
} else {
|
|
return Ok(Module { definitions });
|
|
}
|
|
}
|
|
}
|
|
|
|
#[allow(unused)]
|
|
#[cfg(not(coverage))]
|
|
fn print_next_token(&mut self, comment: &str) {
|
|
let token = self.next().expect("can get token");
|
|
println!(
|
|
"[{comment}] next token will be {:?}",
|
|
token.as_ref().map(|x| x.token.clone())
|
|
);
|
|
if let Some(token) = token {
|
|
self.save(token);
|
|
}
|
|
}
|
|
|
|
/// Parse a definition in a file (structure, enumeration, value, etc.).
|
|
///
|
|
/// This will read a definition. If there's an error, it's very likely the
|
|
/// input stream will be corrupted, so you probably don't want to try to
|
|
/// recover. You can, obviously.
|
|
pub fn parse_definition(&mut self) -> Result<Definition, ParserError> {
|
|
let (export, start) = self.parse_export_class()?;
|
|
let type_restrictions = self.parse_type_restrictions()?;
|
|
let definition = self.parse_def()?;
|
|
let location = definition.location().extend_to(&start);
|
|
|
|
Ok(Definition {
|
|
location,
|
|
export,
|
|
type_restrictions,
|
|
definition,
|
|
})
|
|
}
|
|
|
|
/// Parse the export class for the current definition.
|
|
///
|
|
/// If there isn't an 'export' declaration, then this will return 'private',
|
|
/// because if it hasn't been declared exported then it's private. But this
|
|
/// does mean that a future parsing error will be assumed to be a private
|
|
/// declaration.
|
|
fn parse_export_class(&mut self) -> Result<(ExportClass, Location), ParserError> {
|
|
if let Ok(span) = self.require_keyword("export") {
|
|
Ok((ExportClass::Public, span))
|
|
} else {
|
|
let start = self.current_location()?;
|
|
Ok((ExportClass::Private, start))
|
|
}
|
|
}
|
|
|
|
/// Parse a type restriction and return it.
|
|
///
|
|
/// Like the export class parsing, parsing type restrictions has a clear
|
|
/// default (no restrictions) when the input doesn't lead with the appropriate
|
|
/// keyword. As a result, this can generate a result even in cases in which
|
|
/// the input is empty.
|
|
pub fn parse_type_restrictions(&mut self) -> Result<TypeRestrictions, ParserError> {
|
|
if self.require_keyword("restrict").is_err() {
|
|
return Ok(TypeRestrictions::empty());
|
|
}
|
|
let _ = self.require_token(Token::OpenParen, "type restriction")?;
|
|
|
|
let mut restrictions = vec![];
|
|
|
|
while let Some(type_restriction) = self.parse_type_restriction()? {
|
|
restrictions.push(type_restriction);
|
|
}
|
|
|
|
let _ = self.require_token(Token::CloseParen, "type restriction")?;
|
|
Ok(TypeRestrictions { restrictions })
|
|
}
|
|
|
|
/// Parse a single type retriction.
|
|
///
|
|
/// A type restriction should consist of a constructor token followed by
|
|
/// some number of arguments. We parse this in the obvious way, stopping
|
|
/// the input when we hit something that isn't a base type.
|
|
///
|
|
/// Note that, because of this, we might end up in a situation in which
|
|
/// we throw an error after consuming a bunch of input, meaning that it
|
|
/// will be impossible to recover.
|
|
fn parse_type_restriction(&mut self) -> Result<Option<TypeRestriction>, ParserError> {
|
|
let maybe_constructor = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("Looking for constructor for type restriction"))?;
|
|
|
|
let constructor = match maybe_constructor.token {
|
|
Token::TypeName(str) => {
|
|
let name = Name::new(self.to_location(maybe_constructor.span.clone()), str);
|
|
Type::Constructor(self.to_location(maybe_constructor.span), name)
|
|
}
|
|
Token::PrimitiveTypeName(str) => {
|
|
let name = Name::new(self.to_location(maybe_constructor.span.clone()), str);
|
|
Type::Primitive(self.to_location(maybe_constructor.span), name)
|
|
}
|
|
|
|
token @ Token::CloseParen | token @ Token::Comma => {
|
|
self.save(LocatedToken {
|
|
token,
|
|
span: maybe_constructor.span,
|
|
});
|
|
return Ok(None);
|
|
}
|
|
|
|
weird => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_constructor.span,
|
|
token: weird,
|
|
expected: "Constructor name, comma, or close parenthesis in type restriction"
|
|
.into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let mut arguments = vec![];
|
|
|
|
while let Ok(t) = self.parse_base_type() {
|
|
arguments.push(t);
|
|
}
|
|
|
|
let restriction = TypeRestriction {
|
|
constructor,
|
|
arguments,
|
|
};
|
|
|
|
let _ = self.require_token(Token::Comma, "");
|
|
|
|
Ok(Some(restriction))
|
|
}
|
|
|
|
/// Parse a definition.
|
|
///
|
|
/// A definition can include a structure definition, the definition of an enumeration,
|
|
/// the declaration of some sort of operator, or a value definition. (This statement
|
|
/// assumes that you consider a function a value, which is reasonable.)
|
|
///
|
|
/// If this returns an error, you should not presume that you can recover from it.
|
|
fn parse_def(&mut self) -> Result<Def, ParserError> {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for definition body"))?;
|
|
|
|
match next.token {
|
|
Token::ValueName(ref x) if x == "structure" => {
|
|
self.save(next);
|
|
Ok(Def::Structure(self.parse_structure()?))
|
|
}
|
|
|
|
Token::ValueName(ref x) if x == "enumeration" => {
|
|
self.save(next);
|
|
Ok(Def::Enumeration(self.parse_enumeration()?))
|
|
}
|
|
|
|
Token::ValueName(ref x)
|
|
if x == "operator" || x == "prefix" || x == "infix" || x == "postfix" =>
|
|
{
|
|
self.save(next);
|
|
Ok(Def::Operator(self.parse_operator()?))
|
|
}
|
|
|
|
Token::ValueName(_) => {
|
|
self.save(next);
|
|
self.parse_function_or_value()
|
|
}
|
|
|
|
_ => Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "'structure', 'enumeration', 'operator', or a value identifier".into(),
|
|
}),
|
|
}
|
|
}
|
|
|
|
/// Parse a structure definition.
|
|
///
|
|
/// Structure definitions should start with the keyword "structure". If they
|
|
/// don't, this will return, but it will do so in a way that is recoverable.
|
|
/// Otherwise, we'll start eating tokens and who knows what state we'll end
|
|
/// in.
|
|
pub fn parse_structure(&mut self) -> Result<StructureDef, ParserError> {
|
|
let start_location = self.require_keyword("structure")?;
|
|
|
|
let structure_name = self.parse_type_name("structure definition")?;
|
|
self.require_token(Token::OpenBrace, "after a structure name")?;
|
|
|
|
let mut fields = vec![];
|
|
while let Some(field_definition) = self.parse_field_definition()? {
|
|
fields.push(field_definition);
|
|
}
|
|
|
|
let brace =
|
|
self.require_token(Token::CloseBrace, "at the end of a structure definition")?;
|
|
|
|
let location = start_location.extend_to(&brace);
|
|
|
|
Ok(StructureDef {
|
|
name: structure_name,
|
|
location,
|
|
fields,
|
|
})
|
|
}
|
|
|
|
/// Parse a name and field value for a field inside a structure constructor.
|
|
///
|
|
/// In this case, what we mean is the full "foo: bar" syntax that goes inside a structure
|
|
/// expression to declare a value.
|
|
pub fn parse_field_value(&mut self) -> Result<Option<FieldValue>, ParserError> {
|
|
let Ok(field) = self.parse_name("structure value") else {
|
|
return Ok(None);
|
|
};
|
|
self.require_token(Token::Colon, "after a field name")?;
|
|
let value = self.parse_expression()?;
|
|
|
|
if let Some(end_token) = self.next()?
|
|
&& !matches!(end_token.token, Token::Comma)
|
|
{
|
|
self.save(end_token);
|
|
}
|
|
|
|
Ok(Some(FieldValue { field, value }))
|
|
}
|
|
|
|
/// Parse a name and field definition for a field inside a structure definition.
|
|
///
|
|
/// In this case, what we mean is the full "foo: Bar" syntax that goes inside a
|
|
/// structure type definition. Note, though, that we allow the ": Bar" to be
|
|
/// elided in the case that the user wants to try to infer the type. In addition,
|
|
/// recall that structure types can declare their individual fields public or
|
|
/// not, so that information gets parsed as well.
|
|
pub fn parse_field_definition(&mut self) -> Result<Option<StructureField>, ParserError> {
|
|
let (export, start_location) = self.parse_export_class()?;
|
|
let Ok(name) = self.parse_name("field definition") else {
|
|
return Ok(None);
|
|
};
|
|
|
|
let maybe_colon = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for colon, comma, or close brace after field name")
|
|
})?;
|
|
|
|
let field_type = match maybe_colon.token {
|
|
Token::Comma | Token::CloseBrace => {
|
|
self.save(maybe_colon);
|
|
None
|
|
}
|
|
|
|
Token::Colon => Some(self.parse_type()?),
|
|
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_colon.span,
|
|
token: maybe_colon.token,
|
|
expected: "colon, comma, or close brace after field name".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let end_token = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for comma or close brace after field definition")
|
|
})?;
|
|
|
|
let maybe_end_location = match end_token.token {
|
|
Token::Comma => Some(self.to_location(end_token.span)),
|
|
Token::CloseBrace => {
|
|
self.save(end_token);
|
|
None
|
|
}
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: end_token.span,
|
|
token: end_token.token,
|
|
expected: "looking for comma or close brace after field definition".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let end_location = maybe_end_location
|
|
.or_else(|| field_type.as_ref().map(|x| x.location()))
|
|
.unwrap_or_else(|| name.location().unwrap().clone());
|
|
let location = start_location.extend_to(&end_location);
|
|
|
|
Ok(Some(StructureField {
|
|
location,
|
|
export,
|
|
name,
|
|
field_type,
|
|
}))
|
|
}
|
|
|
|
/// Parse an enumeration declaration from the input stream.
|
|
///
|
|
/// As with structures, this will cleanly abort if the first token is wrong,
|
|
/// but if it makes it past that token, all bets are off.
|
|
pub fn parse_enumeration(&mut self) -> Result<EnumerationDef, ParserError> {
|
|
let start_location = self.require_keyword("enumeration")?;
|
|
let enumeration_name = self.parse_type_name("enumeration definition")?;
|
|
|
|
self.require_token(Token::OpenBrace, "after enumeration name")?;
|
|
|
|
let mut variants = vec![];
|
|
while let Some(variant_definition) = self.parse_enum_variant()? {
|
|
variants.push(variant_definition);
|
|
}
|
|
|
|
let brace = self.require_token(Token::CloseBrace, "after enumeration options")?;
|
|
|
|
let location = start_location.extend_to(&brace);
|
|
|
|
Ok(EnumerationDef {
|
|
name: enumeration_name,
|
|
location,
|
|
variants,
|
|
})
|
|
}
|
|
|
|
/// Parse a variant of an enumeration in the enumeration definition.
|
|
///
|
|
/// At this point in bang's lifecycle, enumerations can have zero or one arguments,
|
|
/// but no more, which simplified parsing a trace.
|
|
pub fn parse_enum_variant(&mut self) -> Result<Option<EnumerationVariant>, ParserError> {
|
|
let Ok(name) = self.parse_type_name("variant definition") else {
|
|
return Ok(None);
|
|
};
|
|
let start_location = name.location().unwrap().clone();
|
|
|
|
let maybe_paren = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("trying to understand enumeration variant"))?;
|
|
let (argument, arg_location) = if matches!(maybe_paren.token, Token::OpenParen) {
|
|
let t = self.parse_type()?;
|
|
self.require_token(Token::CloseParen, "variant's type argument")?;
|
|
let location = t.location();
|
|
(Some(t), location)
|
|
} else {
|
|
self.save(maybe_paren);
|
|
(None, start_location.clone())
|
|
};
|
|
|
|
let ender = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for comma or close brace after enumeration variant")
|
|
})?;
|
|
let end_location = match ender.token {
|
|
Token::Comma => self.to_location(ender.span),
|
|
Token::CloseBrace => {
|
|
self.save(ender);
|
|
arg_location
|
|
}
|
|
_ => {
|
|
self.save(ender.clone());
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: ender.span,
|
|
token: ender.token,
|
|
expected: "comma or close brace after enumeration variant".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let location = start_location.extend_to(&end_location);
|
|
|
|
Ok(Some(EnumerationVariant {
|
|
name,
|
|
location,
|
|
argument,
|
|
}))
|
|
}
|
|
|
|
/// Parse an operator declaration.
|
|
///
|
|
/// Operator declarations are the only thing where we immediately modify the state
|
|
/// of the parser, allowing the operator to be used immediately after it is declared.
|
|
/// Note that by "declare", we mean that the operator is given a variable that it maps
|
|
/// to; that variable can be declared further on in the file or even in another module,
|
|
/// as we won't try to resolve it until later.
|
|
///
|
|
/// Like most definitions, we'll abort cleanly if the first token isn't "operator",
|
|
/// "infix", "postfix", or "prefix" keywords, but all bets are off after that.
|
|
pub fn parse_operator(&mut self) -> Result<OperatorDef, ParserError> {
|
|
let (start, operator_type, associativity) = {
|
|
let mut optype = OperatorType::Infix;
|
|
let mut start = None;
|
|
let mut assoc = Associativity::None;
|
|
|
|
if let Ok(loc) = self.require_keyword("prefix") {
|
|
optype = OperatorType::Prefix;
|
|
start = Some(loc);
|
|
} else if let Ok(loc) = self.require_keyword("postfix") {
|
|
optype = OperatorType::Postfix;
|
|
start = Some(loc);
|
|
} else if let Ok(loc) = self.require_keyword("infix") {
|
|
start = Some(loc);
|
|
|
|
if self.require_keyword("right").is_ok() {
|
|
assoc = Associativity::Right;
|
|
} else if self.require_keyword("left").is_ok() {
|
|
assoc = Associativity::Left;
|
|
}
|
|
}
|
|
|
|
let oploc = self.require_keyword("operator")?;
|
|
(start.unwrap_or(oploc), optype, assoc)
|
|
};
|
|
let operator_name = self.parse_operator_name("operator definition")?;
|
|
|
|
let level = if self.require_keyword("at").is_ok() {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("precedence value in operator definition"))?;
|
|
|
|
match next.token {
|
|
Token::Integer(int_with_base) if int_with_base.value < 10 => {
|
|
int_with_base.value as u8
|
|
}
|
|
|
|
Token::Integer(ref int_with_base) => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token.clone(),
|
|
expected: format!(
|
|
"number defining operator precedence ({} is too large",
|
|
int_with_base.value
|
|
),
|
|
});
|
|
}
|
|
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "number defining operator precedence".into(),
|
|
});
|
|
}
|
|
}
|
|
} else {
|
|
5
|
|
};
|
|
|
|
let _ = self.require_token(Token::Arrow, "operator definition")?;
|
|
|
|
let function_name = self.parse_name("operator function definition")?;
|
|
let end = self.require_token(Token::Semi, "end of operator definition")?;
|
|
|
|
match operator_type {
|
|
OperatorType::Infix => {
|
|
self.add_infix_precedence(operator_name.as_printed(), associativity, level)
|
|
}
|
|
OperatorType::Prefix => self.add_prefix_precedence(operator_name.as_printed(), level),
|
|
OperatorType::Postfix => self.add_postfix_precedence(operator_name.as_printed(), level),
|
|
}
|
|
|
|
Ok(OperatorDef {
|
|
location: start.extend_to(&end),
|
|
operator_name,
|
|
function_name,
|
|
})
|
|
}
|
|
|
|
/// Parse a function or a value.
|
|
///
|
|
/// Technically speaking, functions are values, so the name can feel a little silly.
|
|
/// However, we have some nice syntax for functions that avoids the need to put lambdas
|
|
/// everywhere, and so we sort of treat them differently.
|
|
fn parse_function_or_value(&mut self) -> Result<Def, ParserError> {
|
|
let name = self.parse_name("function or value definition")?;
|
|
let start = name.location().unwrap().clone();
|
|
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("type or value for definition"))?;
|
|
|
|
match next.token {
|
|
// If we see an open parenthesis next, we're looking at a nicely-formatted
|
|
// function definition, such as:
|
|
//
|
|
// factorial(x: Int) : Int {
|
|
// match x {
|
|
// 1 => 1,
|
|
// x => x * fact(x - 1),
|
|
// }
|
|
// }
|
|
//
|
|
// Or any of many variations of that.
|
|
Token::OpenParen => {
|
|
self.save(next);
|
|
let arguments = self.parse_function_def_arguments()?;
|
|
let mut return_type = None;
|
|
|
|
if self.require_token(Token::Colon, "return type").is_ok() {
|
|
return_type = Some(self.parse_type()?);
|
|
}
|
|
|
|
let Expression::Block(end, body) = self.parse_block()? else {
|
|
panic!("parse_block returned something that wasn't a block.");
|
|
};
|
|
|
|
Ok(Def::Function(FunctionDef {
|
|
name,
|
|
location: start.extend_to(&end),
|
|
arguments,
|
|
return_type,
|
|
body,
|
|
}))
|
|
}
|
|
|
|
// If we see a colon, then someone's giving us a type for what is probably
|
|
// some form of simple constant, such as:
|
|
//
|
|
// foo : Int = 4;
|
|
//
|
|
// But honestly, there's a lot of odd possibilities of complicated things
|
|
// they could write there.
|
|
Token::Colon => {
|
|
let value_type = self.parse_type()?;
|
|
let _ = self.require_operator("=")?;
|
|
let value = self.parse_expression()?;
|
|
let end = self.require_token(Token::Semi, "at end of definition")?;
|
|
|
|
Ok(Def::Value(ValueDef {
|
|
name,
|
|
location: start.extend_to(&end),
|
|
mtype: Some(value_type),
|
|
value,
|
|
}))
|
|
}
|
|
|
|
// If we see an equal sign, we're jumping right to the value part of the
|
|
// definition, and we're doing something like this:
|
|
//
|
|
// foo = 4;
|
|
//
|
|
// Again, though, you could write all sorts of interesting things after
|
|
// that.
|
|
Token::OperatorName(eq) if eq == "=" => {
|
|
let value = self.parse_expression()?;
|
|
let end = self.require_token(Token::Semi, "at end of definition")?;
|
|
|
|
Ok(Def::Value(ValueDef {
|
|
name,
|
|
location: start.extend_to(&end),
|
|
mtype: None,
|
|
value,
|
|
}))
|
|
}
|
|
|
|
// Those should be the only cases, so if we get here, something weird
|
|
// is going on.
|
|
_ => Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "open parenthesis, colon, or equals after variable in definition".into(),
|
|
}),
|
|
}
|
|
}
|
|
|
|
/// Parse the arguments to a function declaration.
|
|
///
|
|
/// Function arguments should have types, but don't have to. This function assumes
|
|
/// that it's starting at the opening parenthesis, and will error (cleanly) if it
|
|
/// isn't.
|
|
fn parse_function_def_arguments(&mut self) -> Result<Vec<FunctionArg>, ParserError> {
|
|
let _ = self.require_token(Token::OpenParen, "start of function argument definition")?;
|
|
let mut result = vec![];
|
|
let mut just_skipped_comma = false;
|
|
|
|
loop {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("parsing function arguments"))?;
|
|
|
|
if matches!(next.token, Token::CloseParen) {
|
|
break;
|
|
}
|
|
|
|
if matches!(next.token, Token::Comma) {
|
|
if just_skipped_comma {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "after another comma in function arguments".into(),
|
|
});
|
|
}
|
|
|
|
just_skipped_comma = true;
|
|
continue;
|
|
}
|
|
|
|
self.save(next);
|
|
just_skipped_comma = false;
|
|
let name = self.parse_name("function argument name")?;
|
|
let mut arg_type = None;
|
|
|
|
if self.require_token(Token::Colon, "").is_ok() {
|
|
arg_type = Some(self.parse_type()?);
|
|
}
|
|
|
|
result.push(FunctionArg { name, arg_type });
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
/// Parse a single expression out of the input stream.
|
|
///
|
|
/// Because expressions can start with so many possible tokens, it's very
|
|
/// likely that if you call this, the input stream will be corrupted by any
|
|
/// errors this function returns. So you should be careful to only call it
|
|
/// in situations that don't require rollback.
|
|
pub fn parse_expression(&mut self) -> Result<Expression, ParserError> {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for an expression"))?;
|
|
|
|
self.save(next.clone());
|
|
match next.token {
|
|
Token::ValueName(x) if x == "match" => {
|
|
Ok(Expression::Match(self.parse_match_expression()?))
|
|
}
|
|
Token::ValueName(x) if x == "if" => {
|
|
Ok(Expression::Conditional(self.parse_if_expression()?))
|
|
}
|
|
_ => self.parse_arithmetic(0),
|
|
}
|
|
}
|
|
|
|
/// Parse a match expression.
|
|
///
|
|
/// This function does assume that the next token in the input stream will
|
|
/// be the "match" keyword, and will error immediately (albeit, saving the
|
|
/// stream) if it isn't. So you *can* use this if you're not sure this is
|
|
/// a match expression, and want to escape if it isn't.
|
|
fn parse_match_expression(&mut self) -> Result<MatchExpr, ParserError> {
|
|
let start = self.require_keyword("match")?;
|
|
let value = Box::new(self.parse_arithmetic(0)?);
|
|
self.require_token(Token::OpenBrace, "start of a match case list")?;
|
|
|
|
let mut cases = vec![];
|
|
while let Some(case) = self.parse_match_case()? {
|
|
cases.push(case);
|
|
}
|
|
|
|
let end = self.require_token(Token::CloseBrace, "end of a match case list")?;
|
|
Ok(MatchExpr {
|
|
location: start.extend_to(&end),
|
|
value,
|
|
cases,
|
|
})
|
|
}
|
|
|
|
/// Parse a single match case.
|
|
///
|
|
/// A match case consists of a pattern, a double-arrow, and then an expression
|
|
/// describing what to do if that pattern matches the expression. It may or may
|
|
/// not conclude with a comma.
|
|
fn parse_match_case(&mut self) -> Result<Option<MatchCase>, ParserError> {
|
|
// skip over anything we can just skip
|
|
loop {
|
|
let peeked = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for match case"))?;
|
|
|
|
if matches!(peeked.token, Token::Comma) {
|
|
continue;
|
|
}
|
|
|
|
let stop = matches!(peeked.token, Token::CloseBrace);
|
|
|
|
self.save(peeked);
|
|
if stop {
|
|
return Ok(None);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
let pattern = self.parse_pattern()?;
|
|
self.require_token(Token::Arrow, "after pattern in match clause")?;
|
|
|
|
let consequent = self.parse_expression()?;
|
|
|
|
Ok(Some(MatchCase {
|
|
pattern,
|
|
consequent,
|
|
}))
|
|
}
|
|
|
|
/// Parse a pattern from the input stream.
|
|
///
|
|
/// Patterns are a recursive, complex structure without a clear opening token.
|
|
/// So ... you better be sure that you want a pattern when you call this,
|
|
/// because you're almost certainly not going to be able to recover and try
|
|
/// something else if this breaks.
|
|
pub fn parse_pattern(&mut self) -> Result<Pattern, ParserError> {
|
|
if let Ok(constant) = self.parse_constant() {
|
|
return Ok(Pattern::Constant(constant));
|
|
}
|
|
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for a pattern to match"))?;
|
|
|
|
match next.token {
|
|
Token::ValueName(x) => {
|
|
let name = Name::new(self.to_location(next.span), x);
|
|
Ok(Pattern::Variable(name))
|
|
}
|
|
|
|
Token::TypeName(x) => {
|
|
let type_name = Name::new(self.to_location(next.span.clone()), x);
|
|
let start = self.to_location(next.span);
|
|
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for a pattern to match"))?;
|
|
match next.token {
|
|
Token::OpenBrace => {
|
|
let mut fields = vec![];
|
|
|
|
while let Some(field_pattern) = self.parse_field_pattern()? {
|
|
fields.push(field_pattern)
|
|
}
|
|
|
|
let end =
|
|
self.require_token(Token::CloseBrace, "after structure pattern")?;
|
|
let structure_pattern = StructurePattern {
|
|
location: start.extend_to(&end),
|
|
type_name,
|
|
fields,
|
|
};
|
|
|
|
Ok(Pattern::Structure(structure_pattern))
|
|
}
|
|
|
|
Token::DoubleColon => {
|
|
let variant_name =
|
|
self.parse_type_name("enumeration pattern variant name")?;
|
|
|
|
let mut final_location = variant_name.location().unwrap().clone();
|
|
|
|
let argument = if let Some(maybe_paren) = self.next()? {
|
|
if matches!(maybe_paren.token, Token::OpenParen) {
|
|
let sub_pattern = self.parse_pattern()?;
|
|
final_location = self.require_token(
|
|
Token::CloseParen,
|
|
"after enumeration pattern argument",
|
|
)?;
|
|
|
|
Some(Box::new(sub_pattern))
|
|
} else {
|
|
self.save(maybe_paren);
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let location = start.extend_to(&final_location);
|
|
|
|
let pattern = EnumerationPattern {
|
|
location,
|
|
type_name,
|
|
variant_name,
|
|
argument,
|
|
};
|
|
|
|
Ok(Pattern::EnumerationValue(pattern))
|
|
}
|
|
|
|
_ => Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "An '::' or '{' after a type name in a pattern".into(),
|
|
}),
|
|
}
|
|
}
|
|
|
|
_ => Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "The start of a pattern: a variable name or type name".into(),
|
|
}),
|
|
}
|
|
}
|
|
|
|
/// Parse a field pattern.
|
|
///
|
|
/// For reference, a field pattern is either just the name of a field, or a name of a
|
|
/// field plus a colon and some form of subpattern. This can be used to either rename
|
|
/// a field or to only match when a field has a particular value.
|
|
///
|
|
/// Regardless, this should start with a name, and if it doesn't start with a name,
|
|
/// we'll return Ok(None) to indicate that we're done parsing field patterns. If we
|
|
/// do get a name and then reach some sort of error, though, who knows what state we'll
|
|
/// end up in.
|
|
fn parse_field_pattern(&mut self) -> Result<Option<(Name, Option<Pattern>)>, ParserError> {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for structure pattern field name"))?;
|
|
let name = match next.token {
|
|
Token::CloseBrace => {
|
|
self.save(next);
|
|
return Ok(None);
|
|
}
|
|
|
|
Token::ValueName(s) => Name::new(self.to_location(next.span), s),
|
|
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "a field name in a structure pattern".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let next = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for colon, comma, or brace after structure field name in pattern")
|
|
})?;
|
|
let sub_pattern = match next.token {
|
|
Token::Comma => None,
|
|
|
|
Token::CloseBrace => {
|
|
self.save(next);
|
|
None
|
|
}
|
|
|
|
Token::Colon => {
|
|
let subpattern = self.parse_pattern()?;
|
|
let next = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for comma or close brace after structure field")
|
|
})?;
|
|
|
|
match next.token {
|
|
Token::Comma => {}
|
|
Token::CloseBrace => self.save(next),
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "comma or close brace after structure field".into(),
|
|
});
|
|
}
|
|
}
|
|
|
|
Some(subpattern)
|
|
}
|
|
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "colon, comma, or brace after structure field name in pattern".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
Ok(Some((name, sub_pattern)))
|
|
}
|
|
|
|
/// Parse an if expression.
|
|
///
|
|
/// Like many of these functions, there's a nice indicator immediately available to us
|
|
/// so that we know whether or not this is an if statement. If we don't see it, we will
|
|
/// return with an error but the input stream will be clean. However, if we do see one,
|
|
/// and there's an error down the line, then there's nothing we can do.
|
|
fn parse_if_expression(&mut self) -> Result<ConditionalExpr, ParserError> {
|
|
let start = self.require_keyword("if")?;
|
|
let test = self.parse_arithmetic(0)?;
|
|
let consequent = self.parse_block()?;
|
|
let mut alternative = None;
|
|
|
|
if self.require_keyword("else").is_ok() {
|
|
alternative = Some(Box::new(self.parse_block()?));
|
|
}
|
|
|
|
let end = alternative
|
|
.as_ref()
|
|
.map(|x| x.location())
|
|
.unwrap_or_else(|| consequent.location());
|
|
|
|
Ok(ConditionalExpr {
|
|
location: start.extend_to(&end),
|
|
test: Box::new(test),
|
|
consequent: Box::new(consequent),
|
|
alternative,
|
|
})
|
|
}
|
|
|
|
/// Parse a block.
|
|
///
|
|
/// A block starts with an open brace -- so if we don't see one, we'll exit cleanly --
|
|
/// but gets real complicated after that. So, once again, be thoughtful about how this
|
|
/// is called.
|
|
pub fn parse_block(&mut self) -> Result<Expression, ParserError> {
|
|
let start = self.require_token(Token::OpenBrace, "start of a block")?;
|
|
|
|
let mut statements = vec![];
|
|
let mut ended_with_expr = false;
|
|
|
|
while let Some((stmt, terminal)) = self.parse_statement()? {
|
|
statements.push(stmt);
|
|
if terminal {
|
|
ended_with_expr = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
let end = self.require_token(Token::CloseBrace, "end of a block")?;
|
|
|
|
if !ended_with_expr {
|
|
let void_name = Name::new(end.clone(), "%prim%void");
|
|
let void_ref = Expression::Reference(end.clone(), void_name);
|
|
let void_call = Expression::Call(Box::new(void_ref), CallKind::Normal, vec![]);
|
|
statements.push(Statement::Expression(void_call));
|
|
}
|
|
|
|
Ok(Expression::Block(start.extend_to(&end), statements))
|
|
}
|
|
|
|
/// Parse a statement, or return None if we're now done with parsing a block.
|
|
///
|
|
/// We know we're done parsing a block when we hit a close brace, basically. We
|
|
/// should ignore excess semicolons cleanly, and that sort of thing. Because
|
|
/// statements vary pretty widely, you should not assume that the input is clean
|
|
/// on any sort of error.
|
|
pub fn parse_statement(&mut self) -> Result<Option<(Statement, bool)>, ParserError> {
|
|
loop {
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?;
|
|
|
|
match next.token {
|
|
Token::CloseBrace => {
|
|
self.save(next);
|
|
return Ok(None);
|
|
}
|
|
|
|
Token::Semi => continue,
|
|
|
|
Token::ValueName(ref l) if l == "let" => {
|
|
self.save(next);
|
|
return Ok(Some((Statement::Binding(self.parse_let()?), false)));
|
|
}
|
|
|
|
_ => {
|
|
self.save(next);
|
|
let expr = Statement::Expression(self.parse_expression()?);
|
|
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?;
|
|
|
|
if matches!(next.token, Token::Semi) {
|
|
return Ok(Some((expr, false)));
|
|
} else {
|
|
self.save(next);
|
|
return Ok(Some((expr, true)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse a let statement.
|
|
///
|
|
/// This will assume that the first token in the stream is a "let", and be upset if
|
|
/// it is not. However, it will be upset cleanly, which is nice.
|
|
pub fn parse_let(&mut self) -> Result<BindingStmt, ParserError> {
|
|
let start = self.require_keyword("let")?;
|
|
let mutable = self.require_keyword("mut").is_ok();
|
|
let variable = self.parse_name("let binding")?;
|
|
let _ = self.require_operator("=")?;
|
|
let value = self.parse_expression()?;
|
|
let end = self.require_token(Token::Semi, "let statement")?;
|
|
|
|
Ok(BindingStmt {
|
|
location: start.extend_to(&end),
|
|
mutable,
|
|
variable,
|
|
value,
|
|
})
|
|
}
|
|
|
|
/// Parse an arithmetic expression, obeying the laws of precedence.
|
|
///
|
|
/// This is an implementation of Pratt Parsing, although I've probably done it in
|
|
/// a much more awkward way than necessary. I was heavily inspired and/or stole
|
|
/// code directly from [this
|
|
/// article](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html),
|
|
/// which was instrumental in its design. All errors mine.
|
|
///
|
|
/// Note that because arithmetic expressions can start with so many tokens, you
|
|
/// should only call this function if you are absolutely sure that there's an
|
|
/// expression waiting for you, and it would be an error if there wasn't.
|
|
pub fn parse_arithmetic(&mut self, level: u8) -> Result<Expression, ParserError> {
|
|
// start by checking for prefix operators.
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for arithmetic expression"))?;
|
|
|
|
let mut lhs = if let Token::OperatorName(ref n) = next.token {
|
|
if let Some(pre_prec) = self.prefix_precedence_table.get(n) {
|
|
if *pre_prec < level {
|
|
self.save(next.clone());
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "a base expression of a tighter-binding prefix operator".into(),
|
|
});
|
|
}
|
|
|
|
let rhs = self.parse_arithmetic(*pre_prec)?;
|
|
let location = self.to_location(next.span);
|
|
let opname = Name::new(location.clone(), n);
|
|
let op_expr = Expression::Reference(location, opname);
|
|
|
|
Expression::Call(Box::new(op_expr), CallKind::Prefix, vec![rhs])
|
|
} else {
|
|
self.save(next);
|
|
self.parse_base_expression()?
|
|
}
|
|
} else {
|
|
self.save(next);
|
|
self.parse_base_expression()?
|
|
};
|
|
|
|
loop {
|
|
let Some(next) = self.next()? else {
|
|
return Ok(lhs);
|
|
};
|
|
|
|
match next.token {
|
|
Token::OpenParen => {
|
|
self.save(next);
|
|
let args = self.parse_call_arguments()?;
|
|
lhs = Expression::Call(Box::new(lhs), CallKind::Normal, args);
|
|
}
|
|
|
|
Token::OperatorName(ref n) => {
|
|
if let Some(postprec) = self.postfix_precedence_table.get(n) {
|
|
if *postprec < level {
|
|
self.save(next);
|
|
break;
|
|
}
|
|
|
|
let location = self.to_location(next.span);
|
|
let opname = Name::new(location.clone(), n);
|
|
let op_expr = Expression::Reference(location, opname);
|
|
|
|
lhs = Expression::Call(Box::new(op_expr), CallKind::Postfix, vec![lhs]);
|
|
continue;
|
|
}
|
|
|
|
let (left_pr, right_pr) = self.get_precedence(n);
|
|
|
|
if left_pr < level {
|
|
self.save(next);
|
|
break;
|
|
}
|
|
|
|
let rhs = self.parse_arithmetic(right_pr)?;
|
|
let location = self.to_location(next.span);
|
|
let name = Name::new(location.clone(), n);
|
|
let opref = Box::new(Expression::Reference(location, name));
|
|
let args = vec![lhs, rhs];
|
|
|
|
lhs = Expression::Call(opref, CallKind::Infix, args);
|
|
}
|
|
|
|
_ => {
|
|
self.save(next);
|
|
return Ok(lhs);
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(lhs)
|
|
}
|
|
|
|
/// Parse the arguments to a function call.
|
|
///
|
|
/// We assume that, at this point, you have eaten the thing you're calling out of
|
|
/// the input stream, and are on the parenthesis that defines the arguments to the
|
|
/// function. If you're not there, then this will error, but in a way that you can
|
|
/// recover from.
|
|
fn parse_call_arguments(&mut self) -> Result<Vec<Expression>, ParserError> {
|
|
let _ = self.require_token(Token::OpenParen, "for function arguments")?;
|
|
let mut args = vec![];
|
|
|
|
loop {
|
|
let next = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for an expression or close paren in function arguments")
|
|
})?;
|
|
|
|
if matches!(next.token, Token::CloseParen) {
|
|
break;
|
|
}
|
|
|
|
self.save(next);
|
|
let argument = self.parse_arithmetic(0)?;
|
|
args.push(argument);
|
|
|
|
let next = self.next()?.ok_or_else(|| {
|
|
self.bad_eof("looking for comma or close paren in function arguments")
|
|
})?;
|
|
match next.token {
|
|
Token::Comma => continue,
|
|
Token::CloseParen => break,
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "comma or close paren in function arguments".into(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(args)
|
|
}
|
|
|
|
/// Parse a base expression.
|
|
///
|
|
/// A base expression can be any number of things:
|
|
/// * A constant, of any form
|
|
/// * A variable name
|
|
/// * A constructor, like a structure constructor or an enumeration value
|
|
/// * A parenthesized expression of some other form
|
|
/// * A block
|
|
///
|
|
/// Most of these can be identified by the first token in the input
|
|
/// stream. If we don't recognize a valid first token in the input
|
|
/// stream, we return an error and restore the original input stream
|
|
/// state. However, if the first token leads us to a valid next state,
|
|
/// we may not be able to recover the original stream state on an error.
|
|
///
|
|
/// As a result, this should only be called when you're very confident
|
|
/// that the next thing is going to be an expression.
|
|
pub fn parse_base_expression(&mut self) -> Result<Expression, ParserError> {
|
|
if let Ok(v) = self.parse_constant() {
|
|
return Ok(Expression::Value(v));
|
|
}
|
|
|
|
let next = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for an expression"))?;
|
|
|
|
match next.token {
|
|
Token::OpenBrace => {
|
|
self.save(next);
|
|
self.parse_block()
|
|
}
|
|
|
|
Token::OpenParen => {
|
|
let inner = self.parse_expression()?;
|
|
self.require_token(Token::CloseParen, "the end of a parenthesized expression")?;
|
|
Ok(inner)
|
|
}
|
|
|
|
Token::TypeName(n) | Token::PrimitiveTypeName(n) => {
|
|
let type_name = Name::new(self.to_location(next.span.clone()), n);
|
|
let Some(after_type_name) = self.next()? else {
|
|
return Ok(Expression::Reference(
|
|
type_name.location().unwrap().clone(),
|
|
type_name,
|
|
));
|
|
};
|
|
|
|
match after_type_name.token {
|
|
Token::OpenBrace => {
|
|
let mut fields = vec![];
|
|
|
|
while let Some(field) = self.parse_field_value()? {
|
|
fields.push(field);
|
|
}
|
|
|
|
let brace =
|
|
self.require_token(Token::CloseBrace, "end of structure value")?;
|
|
|
|
let sv = StructureExpr {
|
|
location: self.to_location(next.span).extend_to(&brace),
|
|
type_name,
|
|
fields,
|
|
};
|
|
|
|
Ok(Expression::Structure(sv))
|
|
}
|
|
|
|
Token::DoubleColon => {
|
|
let vname = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for enumeration value name"))?;
|
|
|
|
let variant_name = match vname.token {
|
|
Token::TypeName(s) => {
|
|
let loc = self.to_location(vname.span.clone());
|
|
Name::new(loc, s)
|
|
}
|
|
|
|
_ => {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: vname.span,
|
|
token: vname.token,
|
|
expected: "enumeration value name".into(),
|
|
});
|
|
}
|
|
};
|
|
|
|
let (argument, end_loc) = if let Some(maybe_paren) = self.next()? {
|
|
if matches!(maybe_paren.token, Token::OpenParen) {
|
|
let expr = self.parse_expression()?;
|
|
let closer = self
|
|
.require_token(Token::CloseParen, "after variant argument")?;
|
|
|
|
(Some(Box::new(expr)), closer)
|
|
} else {
|
|
self.save(maybe_paren);
|
|
(None, self.to_location(vname.span))
|
|
}
|
|
} else {
|
|
(None, self.to_location(vname.span))
|
|
};
|
|
|
|
let ev = EnumerationExpr {
|
|
location: self.to_location(next.span).extend_to(&end_loc),
|
|
type_name,
|
|
variant_name,
|
|
argument,
|
|
};
|
|
|
|
Ok(Expression::Enumeration(ev))
|
|
}
|
|
|
|
_ => {
|
|
self.save(after_type_name);
|
|
Ok(Expression::Reference(
|
|
type_name.location().unwrap().clone(),
|
|
type_name,
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
Token::ValueName(n) | Token::PrimitiveValueName(n) => {
|
|
let location = self.to_location(next.span);
|
|
let name = Name::new(location.clone(), n);
|
|
Ok(Expression::Reference(location, name))
|
|
}
|
|
|
|
_ => {
|
|
self.save(next.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: next.span,
|
|
token: next.token,
|
|
expected: "some base expression or an open brace".into(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse a type from the input stream.
|
|
///
|
|
/// Obviously, there are a lot of ways for things to not be a valid
|
|
/// function type. As it can, this will try to leave things in the
|
|
/// original state on an error, but that won't always be possible. So
|
|
/// it's probably best to only try to call this when you're sure there
|
|
/// should be a type sitting there.
|
|
pub fn parse_type(&mut self) -> Result<Type, ParserError> {
|
|
let mut args = Vec::new();
|
|
|
|
while let Ok(t) = self.parse_type_application() {
|
|
args.push(t);
|
|
}
|
|
|
|
let Some(maybe_arrow) = self.next()? else {
|
|
match args.pop() {
|
|
None => {
|
|
return Err(ParserError::UnacceptableEof {
|
|
file: self.file.clone(),
|
|
place: "parsing function type or type".into(),
|
|
});
|
|
}
|
|
|
|
Some(t) if args.is_empty() => return Ok(t),
|
|
|
|
Some(_) => {
|
|
return Err(ParserError::UnacceptableEof {
|
|
file: self.file.clone(),
|
|
place: "looking for '->' in function type".into(),
|
|
});
|
|
}
|
|
}
|
|
};
|
|
|
|
if maybe_arrow.token == Token::Arrow {
|
|
let right = self.parse_type()?;
|
|
Ok(Type::Function(args, Box::new(right)))
|
|
} else if args.len() == 1 {
|
|
self.save(maybe_arrow);
|
|
Ok(args.pop().expect("length = 1 works"))
|
|
} else {
|
|
self.save(maybe_arrow.clone());
|
|
let LocatedToken { token, span } = maybe_arrow;
|
|
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span,
|
|
token,
|
|
expected: "'->' in function type".into(),
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Parse a type application.
|
|
///
|
|
/// Type applications must start with a type name (a capitalized variable
|
|
/// name). If we don't find one, we immediately error out. However if we
|
|
/// do find one, we will then eat as many base types as we can until we
|
|
/// run into an error.
|
|
///
|
|
/// If we don't find a type name immediately, we will return an error but
|
|
/// leave the parse stream unchanged. If we parse a bunch of base types
|
|
/// correctly, the stream will be left at the start of the first non-base-type
|
|
/// token. However, this function can leave things in a weird state if there
|
|
/// is an open parenthesis that tries to enclose something that's not a type.
|
|
fn parse_type_application(&mut self) -> Result<Type, ParserError> {
|
|
let LocatedToken { token, span } =
|
|
self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?;
|
|
|
|
let constructor = match token {
|
|
Token::TypeName(x) => {
|
|
let name = Name::new(self.to_location(span.clone()), x);
|
|
Type::Constructor(self.to_location(span), name)
|
|
}
|
|
Token::PrimitiveTypeName(x) => {
|
|
let name = Name::new(self.to_location(span.clone()), x);
|
|
Type::Primitive(self.to_location(span), name)
|
|
}
|
|
_ => {
|
|
self.save(LocatedToken { token, span });
|
|
return self.parse_base_type();
|
|
}
|
|
};
|
|
|
|
let mut args = vec![];
|
|
|
|
while let Ok(next_arg) = self.parse_base_type() {
|
|
args.push(next_arg);
|
|
}
|
|
|
|
Ok(Type::Application(Box::new(constructor), args))
|
|
}
|
|
|
|
/// Parse a base type from the input stream.
|
|
///
|
|
/// A "base type" is a type variable, a primitive type name, a type name,
|
|
/// or a parenthesized version of some other type. This function will return
|
|
/// an error if it can't find one of these things, and will *attempt* to
|
|
/// return the stream unmodified in the event of an error. However, if it
|
|
/// sees a parenthesis and tries to parse a nested, complex type, it may
|
|
/// not be possible to recover the state precisely.
|
|
fn parse_base_type(&mut self) -> Result<Type, ParserError> {
|
|
let LocatedToken { token, span } =
|
|
self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?;
|
|
|
|
match token {
|
|
Token::TypeName(x) => {
|
|
let name = Name::new(self.to_location(span.clone()), x);
|
|
Ok(Type::Constructor(self.to_location(span), name))
|
|
}
|
|
Token::PrimitiveTypeName(x) => {
|
|
let name = Name::new(self.to_location(span.clone()), x);
|
|
Ok(Type::Primitive(self.to_location(span), name))
|
|
}
|
|
Token::ValueName(x) => {
|
|
let name = Name::new(self.to_location(span.clone()), x);
|
|
Ok(Type::Variable(self.to_location(span), name))
|
|
}
|
|
Token::OpenParen => {
|
|
let t = self.parse_type()?;
|
|
let closer = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("close paren in type"))?;
|
|
|
|
if !matches!(closer.token, Token::CloseParen) {
|
|
return Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: closer.span,
|
|
token: closer.token,
|
|
expected: "close parenthesis to finish a type".into(),
|
|
});
|
|
}
|
|
|
|
Ok(t)
|
|
}
|
|
token => {
|
|
self.save(LocatedToken {
|
|
token: token.clone(),
|
|
span: span.clone(),
|
|
});
|
|
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span,
|
|
token,
|
|
expected: "type constructor, type variable, or primitive type".into(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Try to parse a constant value from the input stream.
|
|
///
|
|
/// If we don't find a name, the stream should be returned in the same state
|
|
/// at which it entered this function.
|
|
pub(crate) fn parse_constant(&mut self) -> Result<ConstantValue, ParserError> {
|
|
let maybe_constant = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof("looking for a constant"))?;
|
|
|
|
match maybe_constant.token {
|
|
Token::Integer(iwb) => Ok(ConstantValue::Integer(
|
|
self.to_location(maybe_constant.span),
|
|
iwb,
|
|
)),
|
|
Token::Character(c) => Ok(ConstantValue::Character(
|
|
self.to_location(maybe_constant.span),
|
|
c,
|
|
)),
|
|
Token::String(s) => Ok(ConstantValue::String(
|
|
self.to_location(maybe_constant.span),
|
|
s,
|
|
)),
|
|
_ => {
|
|
self.save(maybe_constant.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_constant.span,
|
|
token: maybe_constant.token,
|
|
expected: "constant value".into(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Try to parse a name from the input stream.
|
|
///
|
|
/// If we don't find a name, the stream should be returned in the same state
|
|
/// at which it entered this function.
|
|
fn parse_name(&mut self, place: &'static str) -> Result<Name, ParserError> {
|
|
let maybe_name = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof(format!("looking for a name in {place}")))?;
|
|
|
|
if let Token::ValueName(x) = maybe_name.token {
|
|
Ok(Name::new(self.to_location(maybe_name.span), x))
|
|
} else {
|
|
self.save(maybe_name.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_name.span,
|
|
token: maybe_name.token,
|
|
expected: format!("looking for a name in {place}"),
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Try to parse a type name from the input stream.
|
|
///
|
|
/// If we don't find a name, the stream should be returned in the same state
|
|
/// at which it entered this function.
|
|
fn parse_type_name(&mut self, place: &'static str) -> Result<Name, ParserError> {
|
|
let maybe_name = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?;
|
|
|
|
if let Token::TypeName(x) = maybe_name.token {
|
|
Ok(Name::new(self.to_location(maybe_name.span), x))
|
|
} else {
|
|
self.save(maybe_name.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_name.span,
|
|
token: maybe_name.token,
|
|
expected: format!("looking for a type name in {place}"),
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Try to parse an operator from the input stream.
|
|
///
|
|
/// If we don't find a name, the stream should be returned in the same state
|
|
/// at which it entered this function.
|
|
fn parse_operator_name(&mut self, place: &'static str) -> Result<Name, ParserError> {
|
|
let maybe_name = self
|
|
.next()?
|
|
.ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?;
|
|
|
|
if let Token::OperatorName(x) = maybe_name.token {
|
|
Ok(Name::new(self.to_location(maybe_name.span), x))
|
|
} else {
|
|
self.save(maybe_name.clone());
|
|
Err(ParserError::UnexpectedToken {
|
|
file: self.file.clone(),
|
|
span: maybe_name.span,
|
|
token: maybe_name.token,
|
|
expected: format!("looking for an operator name in {place}"),
|
|
})
|
|
}
|
|
}
|
|
}
|