From 24e6bf6318f8fdcfd16bea5f93b08f1af5e09b27 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 22:06:21 -0700 Subject: [PATCH] Start with hand writing the parser again. --- Cargo.lock | 493 +------------------------------------------ src/syntax.rs | 34 ++- src/syntax/error.rs | 11 + src/syntax/parse.rs | 172 +++++++++++++++ src/syntax/tokens.rs | 101 ++++++--- 5 files changed, 271 insertions(+), 540 deletions(-) create mode 100644 src/syntax/parse.rs diff --git a/Cargo.lock b/Cargo.lock index f08dbe5..38cd685 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,24 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "ascii-canvas" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" -dependencies = [ - "term", -] - [[package]] name = "autocfg" version = "1.5.0" @@ -32,27 +14,9 @@ version = "0.1.0" dependencies = [ "codespan", "codespan-reporting", - "lalrpop", - "lalrpop-util", - "logos", "proptest", "proptest-derive", - "thiserror 2.0.14", -] - -[[package]] -name = "beef" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" - -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec 0.6.3", + "thiserror", ] [[package]] @@ -61,15 +25,9 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec 0.8.0", + "bit-vec", ] -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bit-vec" version = "0.8.0" @@ -109,54 +67,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "ena" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" -dependencies = [ - "log", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - [[package]] name = "errno" version = "0.3.13" @@ -173,29 +83,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "getrandom" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.3.3" @@ -205,63 +98,7 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" - -[[package]] -name = "indexmap" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "lalrpop" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" -dependencies = [ - "ascii-canvas", - "bit-set 0.5.3", - "ena", - "itertools", - "lalrpop-util", - "petgraph", - "pico-args", - "regex", - "regex-syntax", - "string_cache", - "term", - "tiny-keccak", - "unicode-xid", - "walkdir", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" -dependencies = [ - "regex-automata", + "wasi", ] [[package]] @@ -276,84 +113,12 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags", - "libc", -] - [[package]] name = "linux-raw-sys" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" -[[package]] -name = "lock_api" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "logos" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" -dependencies = [ - "logos-derive", -] - -[[package]] -name = "logos-codegen" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" -dependencies = [ - "beef", - "fnv", - "lazy_static", - "proc-macro2", - "quote", - "regex-syntax", - "rustc_version", - "syn", -] - -[[package]] -name = "logos-derive" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" -dependencies = [ - "logos-codegen", -] - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "new_debug_unreachable" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" - [[package]] name = "num-traits" version = "0.2.19" @@ -369,54 +134,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "parking_lot" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.52.6", -] - -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pico-args" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -426,12 +143,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "proc-macro2" version = "1.0.97" @@ -447,8 +158,8 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" dependencies = [ - "bit-set 0.8.0", - "bit-vec 0.8.0", + "bit-set", + "bit-vec", "bitflags", "lazy_static", "num-traits", @@ -519,7 +230,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom", ] [[package]] @@ -531,64 +242,12 @@ dependencies = [ "rand_core", ] -[[package]] -name = "redox_syscall" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" -dependencies = [ - "bitflags", -] - -[[package]] -name = "redox_users" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror 1.0.57", -] - -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - [[package]] name = "rustix" version = "1.0.8" @@ -602,12 +261,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - [[package]] name = "rusty-fork" version = "0.3.0" @@ -620,27 +273,6 @@ dependencies = [ "wait-timeout", ] -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" - [[package]] name = "serde" version = "1.0.219" @@ -661,31 +293,6 @@ dependencies = [ "syn", ] -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "string_cache" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" -dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared", - "precomputed-hash", -] - [[package]] name = "syn" version = "2.0.104" @@ -704,23 +311,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.59.0", ] -[[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" -dependencies = [ - "dirs-next", - "rustversion", - "winapi", -] - [[package]] name = "termcolor" version = "1.4.1" @@ -730,33 +326,13 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "thiserror" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" -dependencies = [ - "thiserror-impl 1.0.57", -] - [[package]] name = "thiserror" version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" dependencies = [ - "thiserror-impl 2.0.14", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -770,15 +346,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "unarray" version = "0.1.4" @@ -797,12 +364,6 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - [[package]] name = "wait-timeout" version = "0.2.1" @@ -812,22 +373,6 @@ dependencies = [ "libc", ] -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" @@ -837,22 +382,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.9" @@ -862,12 +391,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/src/syntax.rs b/src/syntax.rs index 12ebc45..dc3f7c4 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,10 +1,10 @@ mod error; +mod parse; pub mod tokens; #[cfg(test)] use crate::syntax::error::ParserError; -#[cfg(test)] -use crate::syntax::parser::*; +use crate::syntax::parse::Parser; #[cfg(test)] use crate::syntax::tokens::Lexer; use codespan_reporting::diagnostic::Label; @@ -171,7 +171,7 @@ pub enum Type { Variable(Location, String), Primitive(Location, String), Application(Box, Vec), - Function(Box, Box), + Function(Vec, Box), } #[derive(Debug)] @@ -202,14 +202,9 @@ pub struct IntegerWithBase { #[test] fn can_parse_constants() { let parse_constant = |str| { - let lexer = Lexer::from(str).map(|item| { - item.map_err(|e| ParserError::LexerError { - file_id: 0, - error: e, - }) - }); - let result = ConstantValueParser::new().parse(0, lexer); - result + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_constant() }; assert!(matches!( @@ -265,20 +260,14 @@ fn can_parse_constants() { #[test] fn can_parse_types() { let parse_type = |str| { - let lexer = Lexer::from(str).map(|item| { - item.map_err(|e| ParserError::LexerError { - file_id: 0, - error: e, - }) - }); - let result = TypeParser::new().parse(0, lexer); - result + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_type() }; - println!("cons result: {:?}", parse_type("Cons")); assert!(matches!( parse_type("Cons"), - Ok(Type::Application(cons, empty)) if + Ok(Type::Application(cons, empty)) if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && empty.is_empty() )); @@ -293,6 +282,9 @@ fn can_parse_types() { matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] if b1 == "a" && b2 == "b") )); + println!("------"); + println!("result: {:?}", parse_type("a -> z")); + println!("------"); assert!(matches!( parse_type("a -> z"), Ok(Type::Function(a, z)) diff --git a/src/syntax/error.rs b/src/syntax/error.rs index eccef47..35924b9 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -7,6 +7,17 @@ use thiserror::Error; pub enum ParserError { #[error("Lexer error at {file_id}: {error}")] LexerError { file_id: usize, error: LexerError }, + + #[error("Unacceptable end of file at {file_id} while {place}")] + UnacceptableEof { file_id: usize, place: &'static str }, + + #[error("Unexpected token at {file_id}: expected {expected}, saw {token}")] + UnexpectedToken { + file_id: usize, + span: Range, + token: Token, + expected: &'static str, + }, } #[derive(Clone, Debug, Error, PartialEq)] diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs new file mode 100644 index 0000000..983bde7 --- /dev/null +++ b/src/syntax/parse.rs @@ -0,0 +1,172 @@ +use crate::syntax::error::ParserError; +use crate::syntax::tokens::{Lexer, LocatedToken, Token}; +use crate::syntax::*; + +pub struct Parser<'a> { + file_id: usize, + lexer: Lexer<'a>, + known_tokens: Vec, +} + +impl<'a> Parser<'a> { + pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> { + Parser { + file_id, + lexer, + known_tokens: vec![], + } + } + + /// Get the next token. + pub fn next(&mut self) -> Result, ParserError> { + let result = self.known_tokens.pop(); + + if result.is_some() { + Ok(result) + } else { + self.lexer + .next() + .transpose() + .map_err(|error| ParserError::LexerError { + file_id: self.file_id, + error, + }) + } + } + + fn save(&mut self, token: LocatedToken) { + self.known_tokens.push(token) + } + + fn bad_eof(&mut self, place: &'static str) -> ParserError { + ParserError::UnacceptableEof { + file_id: self.file_id, + place, + } + } + + fn to_location(&self, span: Range) -> Location { + Location { + file_id: self.file_id, + span, + } + } + + pub fn parse_type(&mut self) -> Result { + self.parse_function_type() + } + + fn parse_function_type(&mut self) -> Result { + let mut args = Vec::new(); + + while let Ok(t) = self.parse_type_application() { + println!("got argument type: {t:?}"); + args.push(t); + } + + let Some(maybe_arrow) = self.next()? else { + println!("no arrow token"); + match args.pop() { + None => { + return Err(ParserError::UnacceptableEof { + file_id: self.file_id, + place: "parsing function type or type", + }); + } + + Some(t) if args.len() == 0 => return Ok(t), + + Some(_) => { + return Err(ParserError::UnacceptableEof { + file_id: self.file_id, + place: "looking for '->' in function type", + }); + } + } + }; + + if maybe_arrow.token == Token::Arrow { + println!("found function arrow"); + let right = self.parse_function_type()?; + Ok(Type::Function(args, Box::new(right))) + } else if args.len() == 1 { + println!("found non function arrow token {}", maybe_arrow.token); + Ok(args.pop().expect("length = 1 works")) + } else { + self.save(maybe_arrow.clone()); + let LocatedToken { token, span } = maybe_arrow; + + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "'->' in function type", + }) + } + } + + fn parse_type_application(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + let constructor = match token { + Token::TypeName(x) => Type::Constructor(self.to_location(span), x), + Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x), + _ => { + println!("saving {token}"); + self.save(LocatedToken { token, span }); + return self.parse_base_type(); + } + }; + + let mut args = vec![]; + + while let Ok(next_arg) = self.parse_base_type() { + args.push(next_arg); + } + + Ok(Type::Application(Box::new(constructor), args)) + } + + fn parse_base_type(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + match token { + Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)), + Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)), + Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)), + token => { + self.save(LocatedToken { + token: token.clone(), + span: span.clone(), + }); + + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "type constructor, type variable, or primitive type", + }) + } + } + } + + pub fn parse_constant(&mut self) -> Result { + let LocatedToken { token, span } = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a constant"))?; + + match token { + Token::Integer(iwb) => Ok(ConstantValue::Integer(self.to_location(span), iwb)), + Token::Character(c) => Ok(ConstantValue::Character(self.to_location(span), c)), + Token::String(s) => Ok(ConstantValue::String(self.to_location(span), s)), + _ => Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "constant value", + }), + } + } +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index c9cd846..08f5885 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -2,8 +2,15 @@ use crate::syntax::IntegerWithBase; use crate::syntax::error::LexerError; use proptest_derive::Arbitrary; use std::fmt; +use std::ops::Range; use std::str::CharIndices; +#[derive(Clone)] +pub struct LocatedToken { + pub token: Token, + pub span: Range, +} + /// A single token of the input stream; used to help the parsing function over /// more concrete things than bytes. /// @@ -92,7 +99,6 @@ struct LexerState<'a> { impl<'a> From<&'a str> for Lexer<'a> { fn from(value: &'a str) -> Self { - println!("LEXING '{value}'"); Lexer::Working(LexerState { stream: value.char_indices(), buffer: None, @@ -110,7 +116,7 @@ impl<'a> Lexer<'a> { } impl<'a> Iterator for Lexer<'a> { - type Item = Result<(usize, Token, usize), LexerError>; + type Item = Result; fn next(&mut self) -> Option { match self { @@ -124,15 +130,11 @@ impl<'a> Iterator for Lexer<'a> { } Ok(None) => { - println!("LEXER DONE"); *self = Lexer::Done(state.stream.offset()); None } - Ok(Some((start, token, end))) => { - println!("TOKEN: {:?}", token); - Some(Ok((start, token, end))) - } + Ok(Some(ltoken)) => Some(Ok(ltoken)), }, } } @@ -141,24 +143,26 @@ impl<'a> Iterator for Lexer<'a> { impl<'a> LexerState<'a> { fn next_char(&mut self) -> Option<(usize, char)> { let result = self.buffer.take().or_else(|| self.stream.next()); - println!("next_char() -> {result:?}"); result } fn stash_char(&mut self, idx: usize, c: char) { - println!("stash_char({idx}, {c})"); assert!(self.buffer.is_none()); self.buffer = Some((idx, c)); } - fn next_token(&mut self) -> Result, LexerError> { + fn next_token(&mut self) -> Result, LexerError> { while let Some((token_start_offset, char)) = self.next_char() { if char.is_whitespace() { continue; } - let simple_response = - |token| Ok(Some((token_start_offset, token, self.stream.offset()))); + let simple_response = |token| { + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) + }; match char { '(' => return simple_response(Token::OpenParen), @@ -219,14 +223,17 @@ impl<'a> LexerState<'a> { fn starts_with_zero( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { match self.next_char() { None => { let token = Token::Integer(IntegerWithBase { base: None, value: 0, }); - Ok(Some((token_start_offset, token, self.stream.offset()))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) } Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), @@ -243,7 +250,10 @@ impl<'a> LexerState<'a> { base: None, value: 0, }); - Ok(Some((token_start_offset, token, offset))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..offset, + })) } } } @@ -255,7 +265,7 @@ impl<'a> LexerState<'a> { base: u32, provided_base: Option, mut value: u64, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut end_offset = self.stream.offset(); while let Some((offset, c)) = self.next_char() { @@ -273,7 +283,10 @@ impl<'a> LexerState<'a> { value, }); - Ok(Some((token_start_offset, token, end_offset))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..end_offset, + })) } fn parse_identifier( @@ -282,7 +295,7 @@ impl<'a> LexerState<'a> { mut identifier: String, mut allowed_character: fn(char) -> bool, mut builder: fn(String) -> Token, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut end_offset = self.stream.offset(); while let Some((offset, c)) = self.next_char() { @@ -321,13 +334,16 @@ impl<'a> LexerState<'a> { } } - Ok(Some((token_start_offset, builder(identifier), end_offset))) + Ok(Some(LocatedToken { + token: builder(identifier), + span: token_start_offset..end_offset, + })) } fn starts_with_single( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let Some((_, mut char)) = self.next_char() else { return Err(LexerError::UnfinishedCharacter { span: token_start_offset..self.stream.offset(), @@ -351,7 +367,10 @@ impl<'a> LexerState<'a> { }); } - Ok(Some((token_start_offset, Token::Character(char), idx))) + Ok(Some(LocatedToken { + token: Token::Character(char), + span: token_start_offset..idx, + })) } fn get_escaped_character(&mut self, token_start_offset: usize) -> Result { @@ -425,12 +444,17 @@ impl<'a> LexerState<'a> { fn starts_with_double( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut result = String::new(); while let Some((idx, char)) = self.next_char() { match char { - '"' => return Ok(Some((token_start_offset, Token::String(result), idx))), + '"' => { + return Ok(Some(LocatedToken { + token: Token::String(result), + span: token_start_offset..idx, + })); + } '\\' => result.push(self.get_escaped_character(idx)?), @@ -446,12 +470,18 @@ impl<'a> LexerState<'a> { fn starts_with_dash( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { match self.next_char() { - None => Ok(Some((token_start_offset, Token::OperatorName("-".into()), token_start_offset))), - Some((end, '>')) => Ok(Some((token_start_offset, Token::Arrow, end))), - Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => - self.parse_identifier( + None => Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..token_start_offset + 1, + })), + Some((end, '>')) => Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })), + Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => self + .parse_identifier( token_start_offset, format!("-{c}"), |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), @@ -459,7 +489,10 @@ impl<'a> LexerState<'a> { ), Some((idx, c)) => { self.stash_char(idx, c); - Ok(Some((token_start_offset, Token::OperatorName("-".into()), idx))) + Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..idx, + })) } } } @@ -474,7 +507,7 @@ proptest::proptest! { let initial_token = tokens.next() .expect("Can get a token without an error.") .expect("Can get a valid token.") - .1; + .token; proptest::prop_assert_eq!(token, initial_token); proptest::prop_assert!(tokens.next().is_none()); @@ -488,7 +521,7 @@ fn parsed_single_token(s: &str) -> Token { .next() .expect(format!("Can get at least one token from {s:?}").as_str()) .expect("Can get a valid token.") - .1; + .token; assert!( tokens.next().is_none(), @@ -608,7 +641,7 @@ fn operators_work_as_expected() { #[test] fn can_separate_pieces() { let mut lexer = Lexer::from("a-b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token()); @@ -616,7 +649,7 @@ fn can_separate_pieces() { assert_eq!(None, next_token()); let mut lexer = Lexer::from("a--b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("--".into())), next_token()); @@ -624,7 +657,7 @@ fn can_separate_pieces() { assert_eq!(None, next_token()); let mut lexer = Lexer::from("a - -b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token());