✍️ Switch to a handwritten lexer and parser. #1

Open
acw wants to merge 33 commits from handwritten-lexer into master
6 changed files with 1449 additions and 373 deletions
Showing only changes of commit 4362d82034 - Show all commits

271
Cargo.lock generated
View File

@@ -36,15 +36,15 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.1" version = "2.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.1" version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]] [[package]]
name = "codespan" name = "codespan"
@@ -69,12 +69,12 @@ dependencies = [
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.3.13" version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys 0.60.2", "windows-sys",
] ]
[[package]] [[package]]
@@ -109,15 +109,15 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.175" version = "0.2.176"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.9.4" version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]] [[package]]
name = "num-traits" name = "num-traits"
@@ -145,18 +145,18 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.97" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]] [[package]]
name = "proptest" name = "proptest"
version = "1.7.0" version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce"
dependencies = [ dependencies = [
"bit-set", "bit-set",
"bit-vec", "bit-vec",
@@ -244,21 +244,21 @@ dependencies = [
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.5" version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "1.0.8" version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
"windows-sys 0.60.2", "windows-sys",
] ]
[[package]] [[package]]
@@ -275,18 +275,28 @@ dependencies = [
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.219" version = "1.0.227"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.227"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.219" version = "1.0.227"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -295,9 +305,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.104" version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -306,15 +316,15 @@ dependencies = [
[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.20.0" version = "3.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom", "getrandom",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.59.0", "windows-sys",
] ]
[[package]] [[package]]
@@ -328,18 +338,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.14" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "2.0.14" version = "2.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -354,15 +364,15 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.18" version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.14" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
[[package]] [[package]]
name = "wait-timeout" name = "wait-timeout"
@@ -375,191 +385,66 @@ dependencies = [
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.14.2+wasi-0.2.4" version = "0.14.7+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
dependencies = [ dependencies = [
"wit-bindgen-rt", "wasip2",
]
[[package]]
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
] ]
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [ dependencies = [
"windows-sys 0.59.0", "windows-sys",
] ]
[[package]]
name = "windows-link"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.59.0" version = "0.61.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f"
dependencies = [ dependencies = [
"windows-targets 0.52.6", "windows-link",
] ]
[[package]] [[package]]
name = "windows-sys" name = "wit-bindgen"
version = "0.60.2" version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
dependencies = [
"windows-targets 0.53.2",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
"windows_i686_gnullvm 0.52.6",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.53.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
dependencies = [
"windows_aarch64_gnullvm 0.53.0",
"windows_aarch64_msvc 0.53.0",
"windows_i686_gnu 0.53.0",
"windows_i686_gnullvm 0.53.0",
"windows_i686_msvc 0.53.0",
"windows_x86_64_gnu 0.53.0",
"windows_x86_64_gnullvm 0.53.0",
"windows_x86_64_msvc 0.53.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_aarch64_msvc"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnu"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_i686_msvc"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnu"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "windows_x86_64_msvc"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.8.26" version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
dependencies = [ dependencies = [
"zerocopy-derive", "zerocopy-derive",
] ]
[[package]] [[package]]
name = "zerocopy-derive" name = "zerocopy-derive"
version = "0.8.26" version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View File

@@ -1,46 +1,17 @@
mod error; mod error;
mod location;
mod name;
mod parse; mod parse;
#[cfg(test)]
mod parser_tests;
pub mod tokens; pub mod tokens;
#[cfg(test)] pub use location::{Located, Location};
use crate::syntax::error::ParserError; pub use name::Name;
use crate::syntax::parse::Parser;
#[cfg(test)]
use crate::syntax::tokens::Lexer;
use codespan_reporting::diagnostic::Label;
use proptest_derive::Arbitrary; use proptest_derive::Arbitrary;
use std::cmp::{max, min};
use std::fmt::Debug; use std::fmt::Debug;
use std::ops::Range; use std::ops::Range;
#[derive(Debug)]
pub struct Location {
file_id: usize,
span: Range<usize>,
}
impl Location {
pub fn new(file_id: usize, span: Range<usize>) -> Self {
Location { file_id, span }
}
pub fn extend_to(&self, other: &Location) -> Location {
assert_eq!(self.file_id, other.file_id);
Location {
file_id: self.file_id,
span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end),
}
}
pub fn primary_label(&self) -> Label<usize> {
Label::primary(self.file_id, self.span.clone())
}
pub fn secondary_label(&self) -> Label<usize> {
Label::secondary(self.file_id, self.span.clone())
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct Module { pub struct Module {
definitions: Vec<Definition>, definitions: Vec<Definition>,
@@ -54,6 +25,12 @@ pub struct Definition {
definition: Def, definition: Def,
} }
impl Located for Definition {
fn location(&self) -> Location {
self.location.clone()
}
}
#[derive(Debug)] #[derive(Debug)]
pub enum Def { pub enum Def {
Enumeration(EnumerationDef), Enumeration(EnumerationDef),
@@ -62,28 +39,29 @@ pub enum Def {
Value(ValueDef), Value(ValueDef),
} }
impl Def { impl Located for Def {
fn location(&self) -> &Location { fn location(&self) -> Location {
match self { match self {
Def::Enumeration(def) => &def.location, Def::Enumeration(def) => def.location.clone(),
Def::Structure(def) => &def.location, Def::Structure(def) => def.location.clone(),
Def::Function(def) => &def.location, Def::Function(def) => def.location.clone(),
Def::Value(def) => &def.location, Def::Value(def) => def.location.clone(),
} }
} }
} }
#[derive(Debug)] #[derive(Debug)]
pub struct EnumerationDef { pub struct EnumerationDef {
name: String,
location: Location, location: Location,
options: Vec<EnumerationVariant>, variants: Vec<EnumerationVariant>,
} }
#[derive(Debug)] #[derive(Debug)]
pub struct EnumerationVariant { pub struct EnumerationVariant {
location: Location, location: Location,
name: String, name: String,
arguments: Vec<Type>, argument: Option<Type>,
} }
#[derive(Debug)] #[derive(Debug)]
@@ -95,8 +73,10 @@ pub struct StructureDef {
#[derive(Debug)] #[derive(Debug)]
pub struct StructureField { pub struct StructureField {
location: Location,
export: ExportClass,
name: String, name: String,
field_type: Type, field_type: Option<Type>,
} }
#[derive(Debug)] #[derive(Debug)]
@@ -118,7 +98,7 @@ pub struct FunctionArg {
pub struct ValueDef { pub struct ValueDef {
name: String, name: String,
location: Location, location: Location,
value: Value, value: Expression,
} }
#[derive(Debug)] #[derive(Debug)]
@@ -142,7 +122,16 @@ pub struct BindingStmt {
#[derive(Debug)] #[derive(Debug)]
pub enum Expression { pub enum Expression {
Value(Value), Value(ConstantValue),
Reference(Name),
EnumerationValue(Name, Name, Option<Box<Expression>>),
StructureValue(Name, Vec<FieldValue>),
}
#[derive(Debug)]
pub struct FieldValue {
field: Name,
value: Expression,
} }
#[derive(Debug)] #[derive(Debug)]
@@ -160,9 +149,8 @@ impl TypeRestrictions {
#[derive(Debug)] #[derive(Debug)]
pub struct TypeRestriction { pub struct TypeRestriction {
location: Location, constructor: Type,
class: String, arguments: Vec<Type>,
variables: Vec<String>,
} }
#[derive(Debug)] #[derive(Debug)]
@@ -174,9 +162,28 @@ pub enum Type {
Function(Vec<Type>, Box<Type>), Function(Vec<Type>, Box<Type>),
} }
#[derive(Debug)] impl Located for Type {
pub enum Value { fn location(&self) -> Location {
Constant(ConstantValue), match self {
Type::Constructor(l, _) => l.clone(),
Type::Variable(l, _) => l.clone(),
Type::Primitive(l, _) => l.clone(),
Type::Application(t1, ts) => {
let mut result = t1.location();
if let Some(last) = ts.last() {
result = result.extend_to(&last.location());
}
result
}
Type::Function(args, ret) => {
if let Some(first) = args.first() {
first.location().extend_to(&ret.location())
} else {
ret.location()
}
}
}
}
} }
#[derive(Debug)] #[derive(Debug)]
@@ -198,113 +205,3 @@ pub struct IntegerWithBase {
base: Option<u8>, base: Option<u8>,
value: u64, value: u64,
} }
#[test]
fn can_parse_constants() {
let parse_constant = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_constant()
};
assert!(matches!(
parse_constant("16"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: None,
value: 16,
}
))
));
assert!(matches!(
parse_constant("0x10"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(16),
value: 16,
}
))
));
assert!(matches!(
parse_constant("0o20"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(8),
value: 16,
}
))
));
assert!(matches!(
parse_constant("0b10000"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(2),
value: 16,
}
))
));
assert!(
matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x))
if x == "foo")
);
assert!(matches!(
parse_constant("'f'"),
Ok(ConstantValue::Character(_, 'f'))
));
}
#[test]
fn can_parse_types() {
let parse_type = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_type()
};
assert!(matches!(
parse_type("Cons"),
Ok(Type::Application(cons, empty)) if
matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
empty.is_empty()
));
assert!(matches!(
parse_type("cons"),
Ok(Type::Variable(_, c)) if c == "cons"
));
assert!(matches!(
parse_type("Cons a b"),
Ok(Type::Application(a, b))
if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)]
if b1 == "a" && b2 == "b")
));
println!("------");
println!("result: {:?}", parse_type("a -> z"));
println!("------");
assert!(matches!(
parse_type("a -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
assert!(matches!(
parse_type("a b -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)]
if a1 == "a" && b1 == "b") &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
assert!(matches!(
parse_type("Cons a b -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Application(cons, appargs)]
if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)]
if b1 == "a" && b2 == "b")) &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
}

48
src/syntax/location.rs Normal file
View File

@@ -0,0 +1,48 @@
use codespan_reporting::diagnostic::Label;
use std::cmp::{max, min};
use std::ops::Range;
pub trait Located {
fn location(&self) -> Location;
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Location {
file_id: usize,
span: Range<usize>,
}
impl Location {
pub fn new(file_id: usize, span: Range<usize>) -> Self {
Location { file_id, span }
}
pub fn extend_to(&self, other: &Location) -> Location {
assert_eq!(self.file_id, other.file_id);
Location {
file_id: self.file_id,
span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end),
}
}
pub fn merge_span(mut self, span: Range<usize>) -> Location {
self.span = min(self.span.start, span.start)..max(self.span.end, span.end);
self
}
pub fn file_id(&self) -> usize {
self.file_id
}
pub fn span(&self) -> Range<usize> {
self.span.clone()
}
pub fn primary_label(&self) -> Label<usize> {
Label::primary(self.file_id, self.span.clone())
}
pub fn secondary_label(&self) -> Label<usize> {
Label::secondary(self.file_id, self.span.clone())
}
}

60
src/syntax/name.rs Normal file
View File

@@ -0,0 +1,60 @@
use crate::syntax::{Located, Location};
use std::cmp;
use std::fmt;
use std::hash;
use std::sync::atomic::{AtomicU64, Ordering};
static IDENTIFIER_COUNTER: AtomicU64 = AtomicU64::new(0);
#[derive(Debug)]
pub struct Name {
printable: String,
identifier: u64,
location: Option<Location>,
}
impl cmp::PartialEq for Name {
fn eq(&self, other: &Self) -> bool {
self.identifier == other.identifier
}
}
impl cmp::Eq for Name {}
impl hash::Hash for Name {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.identifier.hash(state);
}
}
impl fmt::Display for Name {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}", self.printable, self.identifier)
}
}
impl Name {
pub fn new<S: ToString>(location: Location, s: S) -> Name {
let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst);
Name {
printable: s.to_string(),
identifier: my_id,
location: Some(location),
}
}
pub fn gensym(base: &'static str) -> Name {
let formatted = format!("<{base}>");
let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst);
Name {
printable: formatted,
identifier: my_id,
location: None,
}
}
pub fn as_printed(&self) -> &str {
self.printable.as_str()
}
}

View File

@@ -46,9 +46,651 @@ impl<'a> Parser<'a> {
} }
fn to_location(&self, span: Range<usize>) -> Location { fn to_location(&self, span: Range<usize>) -> Location {
Location { Location::new(self.file_id, span)
}
pub fn parse_module(&mut self) -> Result<Module, ParserError> {
let mut definitions = vec![];
loop {
let next_token = self.next()?;
if next_token.is_none() {
return Ok(Module { definitions });
}
definitions.push(self.parse_definition()?);
}
}
pub fn parse_definition(&mut self) -> Result<Definition, ParserError> {
let (export, start) = self.parse_export_class()?;
let type_restrictions = self.parse_type_restrictions()?;
let definition = self.parse_def()?;
let location = definition.location().merge_span(start);
Ok(Definition {
location,
export,
type_restrictions,
definition,
})
}
fn parse_export_class(&mut self) -> Result<(ExportClass, Range<usize>), ParserError> {
let maybe_export = self
.next()?
.ok_or_else(|| self.bad_eof("looking for possible export"))?;
if matches!(maybe_export.token, Token::ValueName(ref x) if x == "export") {
Ok((ExportClass::Public, maybe_export.span))
} else {
let start = maybe_export.span.clone();
self.save(maybe_export);
Ok((ExportClass::Private, start))
}
}
pub fn parse_type_restrictions(&mut self) -> Result<TypeRestrictions, ParserError> {
let Some(maybe_restrict) = self.next()? else {
return Ok(TypeRestrictions::empty());
};
if !matches!(maybe_restrict.token, Token::ValueName(ref x) if x == "restrict") {
self.save(maybe_restrict);
return Ok(TypeRestrictions::empty());
}
let maybe_paren = self
.next()?
.ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?;
if !matches!(maybe_paren.token, Token::OpenParen) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_paren.span,
token: maybe_paren.token,
expected: "open parenthesis, following the restrict keyword",
});
}
let mut restrictions = vec![];
while let Some(type_restriction) = self.parse_type_restriction()? {
restrictions.push(type_restriction);
}
let maybe_paren = self
.next()?
.ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?;
if !matches!(maybe_paren.token, Token::CloseParen) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_paren.span,
token: maybe_paren.token,
expected: "close parenthesis following type restrictions",
});
}
Ok(TypeRestrictions { restrictions })
}
fn parse_type_restriction(&mut self) -> Result<Option<TypeRestriction>, ParserError> {
let maybe_constructor = self
.next()?
.ok_or_else(|| self.bad_eof("Looking for constructor for type restriction"))?;
let constructor = match maybe_constructor.token {
Token::TypeName(str) => {
Type::Constructor(self.to_location(maybe_constructor.span), str)
}
Token::PrimitiveTypeName(str) => {
Type::Primitive(self.to_location(maybe_constructor.span), str)
}
token @ Token::CloseParen | token @ Token::Comma => {
self.save(LocatedToken {
token,
span: maybe_constructor.span,
});
return Ok(None);
}
weird => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_constructor.span,
token: weird,
expected: "Constructor name, comma, or close parenthesis in type restriction",
});
}
};
let mut arguments = vec![];
while let Ok(t) = self.parse_base_type() {
arguments.push(t);
}
let restriction = TypeRestriction {
constructor,
arguments,
};
let Some(maybe_comma) = self.next()? else {
return Ok(Some(restriction));
};
match maybe_comma.token {
Token::Comma => {}
_ => self.save(maybe_comma),
}
Ok(Some(restriction))
}
fn parse_def(&mut self) -> Result<Def, ParserError> {
let next = self
.next()?
.ok_or_else(|| self.bad_eof("looking for definition body"))?;
if let Ok(structure) = self.parse_structure() {
return Ok(Def::Structure(structure));
}
if let Ok(enumeration) = self.parse_enumeration() {
return Ok(Def::Enumeration(enumeration));
}
if let Ok(fun_or_val) = self.parse_function_or_value() {
return Ok(fun_or_val);
}
Err(ParserError::UnexpectedToken {
file_id: self.file_id, file_id: self.file_id,
span, span: next.span,
token: next.token,
expected: "'structure', 'enumeration', or a value identifier",
})
}
pub fn parse_structure(&mut self) -> Result<StructureDef, ParserError> {
let structure_token = self
.next()?
.ok_or_else(|| self.bad_eof("looking for definition"))?;
if !matches!(structure_token.token, Token::ValueName(ref s) if s == "structure") {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: structure_token.span,
token: structure_token.token,
expected: "the 'structure' keyword",
});
}
let name = self
.next()?
.ok_or_else(|| self.bad_eof("looking for structure name"))?;
let structure_name = match name.token {
Token::TypeName(str) => str,
_ => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: name.span,
token: name.token,
expected: "a structure name",
});
}
};
let brace = self
.next()?
.ok_or_else(|| self.bad_eof("the open brace after a structure name"))?;
if !matches!(brace.token, Token::OpenBrace) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: brace.span,
token: brace.token,
expected: "the brace after a structure name",
});
}
let mut fields = vec![];
while let Some(field_definition) = self.parse_field_definition()? {
fields.push(field_definition);
}
let brace = self.next()?.ok_or_else(|| {
self.bad_eof("the close brace after at the end of a structure definition")
})?;
if !matches!(brace.token, Token::CloseBrace) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: brace.span,
token: brace.token,
expected: "the brace at the end of a structure definition",
});
}
let location = self
.to_location(structure_token.span)
.extend_to(&self.to_location(brace.span));
Ok(StructureDef {
name: structure_name,
location,
fields,
})
}
pub fn parse_field_value(&mut self) -> Result<Option<FieldValue>, ParserError> {
let maybe_name = self
.next()?
.ok_or_else(|| self.bad_eof("parsing field definition"))?;
let field = match maybe_name.token {
Token::ValueName(x) => Name::new(self.to_location(maybe_name.span), x),
_ => {
self.save(maybe_name.clone());
return Ok(None);
}
};
let maybe_colon = self.next()?.ok_or_else(|| {
self.bad_eof("looking for colon, comma, or close brace after field name")
})?;
if !matches!(maybe_colon.token, Token::Colon) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_colon.span,
token: maybe_colon.token,
expected: "colon after field name in constructor",
});
}
let value = self.parse_expression()?;
let end_token = self.next()?.ok_or_else(|| {
self.bad_eof("looking for comma or close brace after field definition")
})?;
if !matches!(end_token.token, Token::Comma) {
self.save(end_token);
}
Ok(Some(FieldValue { field, value }))
}
pub fn parse_field_definition(&mut self) -> Result<Option<StructureField>, ParserError> {
let (export, start) = self.parse_export_class()?;
let maybe_name = self
.next()?
.ok_or_else(|| self.bad_eof("parsing field definition"))?;
let name = match maybe_name.token {
Token::ValueName(x) => x,
_ => {
self.save(maybe_name.clone());
if matches!(export, ExportClass::Private) {
return Ok(None);
} else {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_name.span,
token: maybe_name.token,
expected: "a field name",
});
}
}
};
let start_location = self.to_location(start);
let maybe_colon = self.next()?.ok_or_else(|| {
self.bad_eof("looking for colon, comma, or close brace after field name")
})?;
let field_type = match maybe_colon.token {
Token::Comma | Token::CloseBrace => {
self.save(maybe_colon);
None
}
Token::Colon => Some(self.parse_type()?),
_ => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_colon.span,
token: maybe_colon.token,
expected: "colon, comma, or close brace after field name",
});
}
};
let end_token = self.next()?.ok_or_else(|| {
self.bad_eof("looking for comma or close brace after field definition")
})?;
let maybe_end_location = match end_token.token {
Token::Comma => Some(self.to_location(end_token.span)),
Token::CloseBrace => {
self.save(end_token);
None
}
_ => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: end_token.span,
token: end_token.token,
expected: "looking for comma or close brace after field definition",
});
}
};
let end_location = maybe_end_location
.or_else(|| field_type.as_ref().map(|x| x.location()))
.unwrap_or_else(|| self.to_location(maybe_name.span));
let location = start_location.extend_to(&end_location);
Ok(Some(StructureField {
location,
export,
name,
field_type,
}))
}
pub fn parse_enumeration(&mut self) -> Result<EnumerationDef, ParserError> {
let enumeration_token = self
.next()?
.ok_or_else(|| self.bad_eof("looking for definition"))?;
if !matches!(enumeration_token.token, Token::ValueName(ref e) if e == "enumeration") {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: enumeration_token.span,
token: enumeration_token.token,
expected: "the 'enumeration' keyword",
});
}
let name = self
.next()?
.ok_or_else(|| self.bad_eof("looking for enumeration name"))?;
let enumeration_name = match name.token {
Token::TypeName(str) => str,
_ => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: name.span,
token: name.token,
expected: "an enumeration name",
});
}
};
let brace = self
.next()?
.ok_or_else(|| self.bad_eof("the open brace after an enumeration name"))?;
if !matches!(brace.token, Token::OpenBrace) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: brace.span,
token: brace.token,
expected: "the brace after an enumeration name",
});
}
let mut variants = vec![];
while let Some(variant_definition) = self.parse_enum_variant()? {
variants.push(variant_definition);
}
let brace = self.next()?.ok_or_else(|| {
self.bad_eof("the close brace after at the end of an enumeration definition")
})?;
if !matches!(brace.token, Token::CloseBrace) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: brace.span,
token: brace.token,
expected: "the brace at the end of an enumeration definition",
});
}
let location = self
.to_location(enumeration_token.span)
.extend_to(&self.to_location(brace.span));
Ok(EnumerationDef {
name: enumeration_name,
location,
variants,
})
}
pub fn parse_enum_variant(&mut self) -> Result<Option<EnumerationVariant>, ParserError> {
let maybe_name = self
.next()?
.ok_or_else(|| self.bad_eof("looking for enumeration name"))?;
let name = match maybe_name.token {
Token::TypeName(x) => x,
Token::CloseBrace => {
self.save(maybe_name);
return Ok(None);
}
_ => {
self.save(maybe_name.clone());
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_name.span,
token: maybe_name.token,
expected: "variant name (identifier starting with a capital)",
});
}
};
let start_location = self.to_location(maybe_name.span);
let maybe_paren = self
.next()?
.ok_or_else(|| self.bad_eof("trying to understand enumeration variant"))?;
let (argument, arg_location) = if matches!(maybe_paren.token, Token::OpenParen) {
let t = self.parse_type()?;
let maybe_close = self
.next()?
.ok_or_else(|| self.bad_eof("trying to parse a enumeration variant's type"))?;
if !matches!(maybe_close.token, Token::CloseParen) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_close.span,
token: maybe_close.token,
expected: "close paren to end an enumeration variant's type argument",
});
}
let location = t.location();
(Some(t), location)
} else {
self.save(maybe_paren);
(None, start_location.clone())
};
let ender = self.next()?.ok_or_else(|| {
self.bad_eof("looking for comma or close brace after enumeration variant")
})?;
let end_location = match ender.token {
Token::Comma => self.to_location(ender.span),
Token::CloseBrace => {
self.save(ender);
arg_location
}
_ => {
self.save(ender.clone());
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: ender.span,
token: ender.token,
expected: "comma or close brace after enumeration variant",
});
}
};
let location = start_location.extend_to(&end_location);
Ok(Some(EnumerationVariant {
name,
location,
argument,
}))
}
pub fn parse_function_or_value(&mut self) -> Result<Def, ParserError> {
unimplemented!()
}
pub fn parse_expression(&mut self) -> Result<Expression, ParserError> {
self.parse_base_expression()
}
pub fn parse_base_expression(&mut self) -> Result<Expression, ParserError> {
if let Ok(v) = self.parse_constant() {
return Ok(Expression::Value(v));
}
let next = self
.next()?
.ok_or_else(|| self.bad_eof("looking for an expression"))?;
match next.token {
Token::OpenBrace => unimplemented!(),
Token::OpenParen => {
let inner = self.parse_expression()?;
let hopefully_close = self
.next()?
.ok_or_else(|| self.bad_eof("looking for close paren to finish expression"))?;
if matches!(hopefully_close.token, Token::CloseParen) {
Ok(inner)
} else {
self.save(hopefully_close.clone());
Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: hopefully_close.span,
token: hopefully_close.token,
expected: "close paren after expression",
})
}
}
Token::TypeName(n) | Token::PrimitiveTypeName(n) => {
let type_name = Name::new(self.to_location(next.span), n);
let after_type_name = self.next()?.ok_or_else(|| {
self.bad_eof("looking for colon, open brace, or open paren in constructor")
})?;
match after_type_name.token {
Token::OpenBrace => {
let mut fields = vec![];
while let Some(field) = self.parse_field_value()? {
fields.push(field);
}
let closer = self.next()?.ok_or_else(|| {
self.bad_eof("looking for close brace in structure value")
})?;
if !matches!(closer.token, Token::CloseBrace) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: closer.span,
token: closer.token,
expected: "close brace or comma after field value",
});
}
Ok(Expression::StructureValue(type_name, fields))
}
Token::Colon => {
let second_colon = self.next()?.ok_or_else(|| {
self.bad_eof("looking for second colon in enumeration value")
})?;
if !matches!(second_colon.token, Token::Colon) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: second_colon.span,
token: second_colon.token,
expected: "second colon in enumeration value",
});
}
let vname = self
.next()?
.ok_or_else(|| self.bad_eof("looking for enumeration value name"))?;
let value_name = match vname.token {
Token::TypeName(s) => {
let loc = self.to_location(vname.span);
Name::new(loc, s)
}
_ => {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: vname.span,
token: vname.token,
expected: "enumeration value name",
});
}
};
let arg = if let Some(maybe_paren) = self.next()? {
let expr = self.parse_expression()?;
let tok = self.next()?.ok_or_else(|| {
self.bad_eof("looking for close paren after enum value argument")
})?;
if !matches!(tok.token, Token::CloseParen) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: tok.span,
token: tok.token,
expected: "close paren after enum value argument",
});
}
Some(Box::new(expr))
} else {
None
};
Ok(Expression::EnumerationValue(type_name, value_name, arg))
}
_ => Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: after_type_name.span,
token: after_type_name.token,
expected: "colon, open brace, or open paren in constructor",
}),
}
}
Token::ValueName(n) | Token::PrimitiveValueName(n) => {
let location = self.to_location(next.span);
let name = Name::new(location, n);
Ok(Expression::Reference(name))
}
_ => {
self.save(next.clone());
Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: next.span,
token: next.token,
expected: "some base expression or an open brace",
})
}
} }
} }
@@ -60,12 +702,10 @@ impl<'a> Parser<'a> {
let mut args = Vec::new(); let mut args = Vec::new();
while let Ok(t) = self.parse_type_application() { while let Ok(t) = self.parse_type_application() {
println!("got argument type: {t:?}");
args.push(t); args.push(t);
} }
let Some(maybe_arrow) = self.next()? else { let Some(maybe_arrow) = self.next()? else {
println!("no arrow token");
match args.pop() { match args.pop() {
None => { None => {
return Err(ParserError::UnacceptableEof { return Err(ParserError::UnacceptableEof {
@@ -86,11 +726,10 @@ impl<'a> Parser<'a> {
}; };
if maybe_arrow.token == Token::Arrow { if maybe_arrow.token == Token::Arrow {
println!("found function arrow");
let right = self.parse_function_type()?; let right = self.parse_function_type()?;
Ok(Type::Function(args, Box::new(right))) Ok(Type::Function(args, Box::new(right)))
} else if args.len() == 1 { } else if args.len() == 1 {
println!("found non function arrow token {}", maybe_arrow.token); self.save(maybe_arrow);
Ok(args.pop().expect("length = 1 works")) Ok(args.pop().expect("length = 1 works"))
} else { } else {
self.save(maybe_arrow.clone()); self.save(maybe_arrow.clone());
@@ -113,7 +752,6 @@ impl<'a> Parser<'a> {
Token::TypeName(x) => Type::Constructor(self.to_location(span), x), Token::TypeName(x) => Type::Constructor(self.to_location(span), x),
Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x), Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x),
_ => { _ => {
println!("saving {token}");
self.save(LocatedToken { token, span }); self.save(LocatedToken { token, span });
return self.parse_base_type(); return self.parse_base_type();
} }
@@ -136,6 +774,23 @@ impl<'a> Parser<'a> {
Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)), Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)),
Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)), Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)),
Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)), Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)),
Token::OpenParen => {
let t = self.parse_type()?;
let closer = self
.next()?
.ok_or_else(|| self.bad_eof("close paren in type"))?;
if !matches!(closer.token, Token::CloseParen) {
return Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: closer.span,
token: closer.token,
expected: "close parenthesis to finish a type",
});
}
Ok(t)
}
token => { token => {
self.save(LocatedToken { self.save(LocatedToken {
token: token.clone(), token: token.clone(),
@@ -153,20 +808,32 @@ impl<'a> Parser<'a> {
} }
pub fn parse_constant(&mut self) -> Result<ConstantValue, ParserError> { pub fn parse_constant(&mut self) -> Result<ConstantValue, ParserError> {
let LocatedToken { token, span } = self let maybe_constant = self
.next()? .next()?
.ok_or_else(|| self.bad_eof("looking for a constant"))?; .ok_or_else(|| self.bad_eof("looking for a constant"))?;
match token { match maybe_constant.token {
Token::Integer(iwb) => Ok(ConstantValue::Integer(self.to_location(span), iwb)), Token::Integer(iwb) => Ok(ConstantValue::Integer(
Token::Character(c) => Ok(ConstantValue::Character(self.to_location(span), c)), self.to_location(maybe_constant.span),
Token::String(s) => Ok(ConstantValue::String(self.to_location(span), s)), iwb,
_ => Err(ParserError::UnexpectedToken { )),
file_id: self.file_id, Token::Character(c) => Ok(ConstantValue::Character(
span, self.to_location(maybe_constant.span),
token, c,
expected: "constant value", )),
}), Token::String(s) => Ok(ConstantValue::String(
self.to_location(maybe_constant.span),
s,
)),
_ => {
self.save(maybe_constant.clone());
Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span: maybe_constant.span,
token: maybe_constant.token,
expected: "constant value",
})
}
} }
} }
} }

519
src/syntax/parser_tests.rs Normal file
View File

@@ -0,0 +1,519 @@
use crate::syntax::parse::Parser;
use crate::syntax::tokens::Lexer;
use crate::syntax::*;
#[test]
fn constants() {
let parse_constant = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_constant()
};
assert!(matches!(
parse_constant("16"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: None,
value: 16,
}
))
));
assert!(matches!(
parse_constant("0x10"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(16),
value: 16,
}
))
));
assert!(matches!(
parse_constant("0o20"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(8),
value: 16,
}
))
));
assert!(matches!(
parse_constant("0b10000"),
Ok(ConstantValue::Integer(
_,
IntegerWithBase {
base: Some(2),
value: 16,
}
))
));
assert!(
matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x))
if x == "foo")
);
assert!(matches!(
parse_constant("'f'"),
Ok(ConstantValue::Character(_, 'f'))
));
}
#[test]
fn types() {
let parse_type = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_type()
};
assert!(matches!(
parse_type("Cons"),
Ok(Type::Application(cons, empty)) if
matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
empty.is_empty()
));
assert!(matches!(
parse_type("cons"),
Ok(Type::Variable(_, c)) if c == "cons"
));
assert!(matches!(
parse_type("Cons a b"),
Ok(Type::Application(a, b))
if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)]
if b1 == "a" && b2 == "b")
));
assert!(matches!(
parse_type("a -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
println!("-------------");
println!("{:?}", parse_type("(a -> z)"));
println!("-------------");
assert!(matches!(
parse_type("(a -> z)"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
assert!(matches!(
parse_type("a b -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)]
if a1 == "a" && b1 == "b") &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
assert!(matches!(
parse_type("Cons a b -> z"),
Ok(Type::Function(a, z))
if matches!(a.as_slice(), [Type::Application(cons, appargs)]
if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)]
if b1 == "a" && b2 == "b")) &&
matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z")
));
}
#[test]
fn type_restrictions() {
let parse_tr = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_type_restrictions()
};
assert!(matches!(
parse_tr("restrict()"),
Ok(TypeRestrictions{ restrictions }) if restrictions.is_empty()
));
assert!(matches!(
parse_tr("restrict(Cons a b)"),
Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 &&
matches!(&restrictions[0], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Cons") &&
arguments.len() == 2 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "a") &&
matches!(&arguments[1], Type::Variable(_, x) if x == "b"))));
assert!(matches!(
parse_tr("restrict(Cons a b,)"),
Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 &&
matches!(&restrictions[0], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Cons") &&
arguments.len() == 2 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "a") &&
matches!(&arguments[1], Type::Variable(_, x) if x == "b"))));
assert!(matches!(parse_tr("restrict(,Cons a b,)"), Err(_)));
assert!(matches!(
parse_tr("restrict(Cons a b, Monad m)"),
Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 &&
matches!(&restrictions[0], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Cons") &&
arguments.len() == 2 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "a") &&
matches!(&arguments[1], Type::Variable(_, x) if x == "b")) &&
matches!(&restrictions[1], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Monad") &&
arguments.len() == 1 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "m"))));
assert!(matches!(
parse_tr("restrict(Cons a b, Monad m,)"),
Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 &&
matches!(&restrictions[0], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Cons") &&
arguments.len() == 2 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "a") &&
matches!(&arguments[1], Type::Variable(_, x) if x == "b")) &&
matches!(&restrictions[1], TypeRestriction {
constructor,
arguments,
} if matches!(constructor, Type::Constructor(_, x) if x == "Monad") &&
arguments.len() == 1 &&
matches!(&arguments[0], Type::Variable(_, x) if x == "m"))));
}
#[test]
fn field_definition() {
let parse_fd = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_field_definition()
};
assert!(matches!(parse_fd("foo"), Err(_),));
assert!(matches!(
parse_fd("foo,"),
Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. }))
if name == "foo"
));
assert!(matches!(
parse_fd("foo}"),
Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. }))
if name == "foo"
));
assert!(matches!(
parse_fd("foo: Word8,"),
Ok(Some(StructureField{ name, field_type, .. }))
if name == "foo" &&
matches!(&field_type, Some(Type::Application(c, args))
if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") &&
args.is_empty())));
assert!(matches!(
parse_fd("foo: Cons a b,"),
Ok(Some(StructureField{ name, field_type, .. }))
if name == "foo" &&
matches!(&field_type, Some(Type::Application(c, args))
if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Cons") &&
matches!(&args.as_slice(), &[Type::Variable(_, v1), Type::Variable(_, v2)]
if v1 == "a" && v2 == "b"))));
assert!(matches!(
parse_fd("foo: a -> b,"),
Ok(Some(StructureField{ name, field_type, .. }))
if name == "foo" &&
matches!(&field_type, Some(Type::Function(args, ret))
if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") &&
matches!(ret.as_ref(), Type::Variable(_, b) if b == "b"))));
assert!(matches!(
parse_fd("export foo: a -> b,"),
Ok(Some(StructureField{ name, export: ExportClass::Public, field_type, .. }))
if name == "foo" &&
matches!(&field_type, Some(Type::Function(args, ret))
if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") &&
matches!(ret.as_ref(), Type::Variable(_, b) if b == "b"))));
}
#[test]
fn structures() {
let parse_st = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_structure()
};
assert!(matches!(parse_st("structure { }"), Err(_)));
assert!(matches!(parse_st("structure {"), Err(_)));
assert!(matches!(parse_st("structure foo {}"), Err(_)));
assert!(matches!(
parse_st("structure Foo {}"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" && fields.is_empty()));
assert!(matches!(
parse_st("structure Foo { bar }"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" &&
matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }]
if name == "bar" && matches!(field_type, None))));
assert!(matches!(
parse_st("structure Foo { bar: Word8 }"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" &&
matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }]
if name == "bar" &&
matches!(field_type, Some(Type::Application(c, args))
if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") &&
args.is_empty()))));
assert!(matches!(
parse_st("structure Foo { bar: Word8, goo }"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" &&
matches!(fields.as_slice(),
&[StructureField { ref name, ref field_type, .. },
StructureField { name: ref name2, field_type: None, .. }]
if name == "bar" &&
name2 == "goo" &&
matches!(field_type, Some(Type::Application(c, args))
if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") &&
args.is_empty()))));
assert!(matches!(
parse_st("structure Foo { bar: b c -> a, goo }"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" &&
matches!(fields.as_slice(),
&[StructureField { ref name, ref field_type, .. },
StructureField { name: ref name2, field_type: None, .. }]
if name == "bar" &&
name2 == "goo" &&
matches!(field_type, Some(Type::Function(args, ret))
if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)]
if b == "b" && c == "c") &&
matches!(ret.as_ref(), Type::Variable(_, a) if a == "a")))));
assert!(matches!(
parse_st("structure Foo { bar: b c -> a, goo, }"),
Ok(StructureDef { name, fields, .. })
if name == "Foo" &&
matches!(fields.as_slice(),
&[StructureField { ref name, ref field_type, .. },
StructureField { name: ref name2, field_type: None, .. }]
if name == "bar" &&
name2 == "goo" &&
matches!(field_type, Some(Type::Function(args, ret))
if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)]
if b == "b" && c == "c") &&
matches!(ret.as_ref(), Type::Variable(_, a) if a == "a")))));
}
#[test]
fn enum_variant() {
let parse_ev = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_enum_variant()
};
assert!(matches!(parse_ev("foo"), Err(_),));
assert!(matches!(parse_ev("foo,"), Err(_),));
assert!(matches!(parse_ev("Cons foo,"), Err(_),));
assert!(matches!(parse_ev(""), Err(_)));
assert!(matches!(parse_ev("}"), Ok(None)));
assert!(matches!(
parse_ev("Cons,"),
Ok(Some(EnumerationVariant { name, argument, .. }))
if name == "Cons" && argument.is_none()));
assert!(matches!(
parse_ev("Cons }"),
Ok(Some(EnumerationVariant { name, argument, .. }))
if name == "Cons" && argument.is_none()));
assert!(matches!(
parse_ev("Cons, }"),
Ok(Some(EnumerationVariant { name, argument, .. }))
if name == "Cons" && argument.is_none()));
assert!(matches!(
parse_ev("Cons(Pair a),"),
Ok(Some(EnumerationVariant { name, ref argument, .. }))
if name == "Cons" &&
matches!(argument, Some(Type::Application(typef, args))
if matches!(typef.as_ref(), Type::Constructor(_, name)
if name == "Pair") &&
matches!(&args.as_slice(), &[Type::Variable(_, argname)]
if argname == "a"))));
assert!(matches!(
parse_ev("Cons(Pair a) }"),
Ok(Some(EnumerationVariant { name, ref argument, .. }))
if name == "Cons" &&
matches!(argument, Some(Type::Application(typef, args))
if matches!(typef.as_ref(), Type::Constructor(_, name)
if name == "Pair") &&
matches!(&args.as_slice(), &[Type::Variable(_, argname)]
if argname == "a"))));
assert!(matches!(
parse_ev("Cons(a b -> c) }"),
Ok(Some(EnumerationVariant { name, ref argument, .. }))
if name == "Cons" &&
matches!(argument, Some(Type::Function(args, ret))
if matches!(&args.as_slice(), &[Type::Variable(_, a), Type::Variable(_, b)]
if a == "a" && b == "b") &&
matches!(ret.as_ref(), Type::Variable(_, c) if c == "c"))));
}
#[test]
fn enumerations() {
let parse_en = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_enumeration()
};
assert!(matches!(parse_en("enumeration { }"), Err(_)));
assert!(matches!(parse_en("enumeration {"), Err(_)));
assert!(matches!(parse_en("enumeration"), Err(_)));
assert!(matches!(
parse_en("enumeration Empty { }"),
Ok(EnumerationDef { name, variants, .. })
if name == "Empty" && variants.is_empty()));
assert!(matches!(
parse_en("enumeration Alternates { A, B }"),
Ok(EnumerationDef { name, variants, .. })
if name == "Alternates" &&
matches!(&variants.as_slice(), &[
EnumerationVariant { name: name1, argument: arg1, ..},
EnumerationVariant { name: name2, argument: arg2, ..},
] if name1 == "A" && arg1.is_none() &&
name2 == "B" && arg2.is_none())));
assert!(matches!(
parse_en("enumeration Alternates { A, B, }"),
Ok(EnumerationDef { name, variants, .. })
if name == "Alternates" &&
matches!(&variants.as_slice(), &[
EnumerationVariant { name: name1, argument: arg1, ..},
EnumerationVariant { name: name2, argument: arg2, ..},
] if name1 == "A" && arg1.is_none() &&
name2 == "B" && arg2.is_none())));
}
#[test]
fn expressions() {
let parse_ex = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_expression()
};
assert!(matches!(parse_ex(""), Err(_)));
assert!(matches!(
parse_ex("x"),
Ok(Expression::Reference(n)) if n.as_printed() == "x"));
assert!(matches!(
parse_ex("(x)"),
Ok(Expression::Reference(n)) if n.as_printed() == "x"));
assert!(matches!(
parse_ex("'c'"),
Ok(Expression::Value(ConstantValue::Character(_, _)))
));
assert!(matches!(
parse_ex("\"c\""),
Ok(Expression::Value(ConstantValue::String(_, _)))
));
assert!(matches!(
parse_ex("1"),
Ok(Expression::Value(ConstantValue::Integer(_, _)))
));
assert!(matches!(
parse_ex("(1)"),
Ok(Expression::Value(ConstantValue::Integer(_, _)))
));
}
#[test]
fn enumeration_values() {
let parse_ex = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_expression()
};
assert!(matches!(parse_ex("Hello::world"), Err(_)));
assert!(matches!(
parse_ex("Hello::World"),
Ok(Expression::EnumerationValue(t, v, None))
if t.as_printed() == "Hello" &&
v.as_printed() == "World"));
assert!(matches!(
parse_ex("Hello::World(a)"),
Ok(Expression::EnumerationValue(t, v, Some(_)))
if t.as_printed() == "Hello" &&
v.as_printed() == "World"));
}
#[test]
fn structure_value() {
let parse_st = |str| {
let lexer = Lexer::from(str);
let mut result = Parser::new(0, lexer);
result.parse_expression()
};
assert!(matches!(parse_st("Foo{ , }"), Err(_)));
assert!(matches!(parse_st("Foo{ foo, }"), Err(_)));
assert!(matches!(parse_st("Foo{ foo: , }"), Err(_)));
assert!(matches!(parse_st("Foo{ , foo: 1, }"), Err(_)));
assert!(matches!(
parse_st("Foo{ foo: 1 }"),
Ok(Expression::StructureValue(sname, values))
if sname.as_printed() == "Foo" &&
matches!(values.as_slice(), [FieldValue{ field, value }]
if field.as_printed() == "foo" &&
matches!(value, Expression::Value(ConstantValue::Integer(_,_))))));
assert!(matches!(
parse_st("Foo{ foo: 1, }"),
Ok(Expression::StructureValue(sname, values))
if sname.as_printed() == "Foo" &&
matches!(values.as_slice(), [FieldValue{ field, value }]
if field.as_printed() == "foo" &&
matches!(value, Expression::Value(ConstantValue::Integer(_,_))))));
assert!(matches!(
parse_st("Foo{ foo: 1, bar: \"foo\" }"),
Ok(Expression::StructureValue(sname, values))
if sname.as_printed() == "Foo" &&
matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 },
FieldValue{ field: f2, value: v2 }]
if f1.as_printed() == "foo" &&
f2.as_printed() == "bar" &&
matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) &&
matches!(v2, Expression::Value(ConstantValue::String(_,_))))));
assert!(matches!(
parse_st("Foo{ foo: 1, bar: \"foo\", }"),
Ok(Expression::StructureValue(sname, values))
if sname.as_printed() == "Foo" &&
matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 },
FieldValue{ field: f2, value: v2 }]
if f1.as_printed() == "foo" &&
f2.as_printed() == "bar" &&
matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) &&
matches!(v2, Expression::Value(ConstantValue::String(_,_))))));
assert!(matches!(
parse_st("Foo{ foo: 1,, bar: \"foo\", }"),
Err(_)));
}