Start with hand writing the parser again.

This commit is contained in:
2025-09-06 22:06:21 -07:00
parent 8657c009c8
commit 24e6bf6318
5 changed files with 271 additions and 540 deletions

493
Cargo.lock generated
View File

@@ -2,24 +2,6 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "ascii-canvas"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6"
dependencies = [
"term",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.5.0" version = "1.5.0"
@@ -32,27 +14,9 @@ version = "0.1.0"
dependencies = [ dependencies = [
"codespan", "codespan",
"codespan-reporting", "codespan-reporting",
"lalrpop",
"lalrpop-util",
"logos",
"proptest", "proptest",
"proptest-derive", "proptest-derive",
"thiserror 2.0.14", "thiserror",
]
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec 0.6.3",
] ]
[[package]] [[package]]
@@ -61,15 +25,9 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [ dependencies = [
"bit-vec 0.8.0", "bit-vec",
] ]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]] [[package]]
name = "bit-vec" name = "bit-vec"
version = "0.8.0" version = "0.8.0"
@@ -109,54 +67,6 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "ena"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1"
dependencies = [
"log",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.3.13" version = "0.3.13"
@@ -173,29 +83,12 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "getrandom"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi 0.11.1+wasi-snapshot-preview1",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.3.3" version = "0.3.3"
@@ -205,63 +98,7 @@ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"r-efi", "r-efi",
"wasi 0.14.2+wasi-0.2.4", "wasi",
]
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
[[package]]
name = "indexmap"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "lalrpop"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca"
dependencies = [
"ascii-canvas",
"bit-set 0.5.3",
"ena",
"itertools",
"lalrpop-util",
"petgraph",
"pico-args",
"regex",
"regex-syntax",
"string_cache",
"term",
"tiny-keccak",
"unicode-xid",
"walkdir",
]
[[package]]
name = "lalrpop-util"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553"
dependencies = [
"regex-automata",
] ]
[[package]] [[package]]
@@ -276,84 +113,12 @@ version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]]
name = "libredox"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
dependencies = [
"bitflags",
"libc",
]
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.9.4" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "lock_api"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "logos"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c"
dependencies = [
"beef",
"fnv",
"lazy_static",
"proc-macro2",
"quote",
"regex-syntax",
"rustc_version",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470"
dependencies = [
"logos-codegen",
]
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.19" version = "0.2.19"
@@ -369,54 +134,6 @@ version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "parking_lot"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets 0.52.6",
]
[[package]]
name = "petgraph"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
dependencies = [
"fixedbitset",
"indexmap",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "pico-args"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315"
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.21" version = "0.2.21"
@@ -426,12 +143,6 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.97" version = "1.0.97"
@@ -447,8 +158,8 @@ version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
dependencies = [ dependencies = [
"bit-set 0.8.0", "bit-set",
"bit-vec 0.8.0", "bit-vec",
"bitflags", "bitflags",
"lazy_static", "lazy_static",
"num-traits", "num-traits",
@@ -519,7 +230,7 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [ dependencies = [
"getrandom 0.3.3", "getrandom",
] ]
[[package]] [[package]]
@@ -531,64 +242,12 @@ dependencies = [
"rand_core", "rand_core",
] ]
[[package]]
name = "redox_syscall"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
dependencies = [
"getrandom 0.2.16",
"libredox",
"thiserror 1.0.57",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]] [[package]]
name = "regex-syntax" name = "regex-syntax"
version = "0.8.5" version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "1.0.8" version = "1.0.8"
@@ -602,12 +261,6 @@ dependencies = [
"windows-sys 0.60.2", "windows-sys 0.60.2",
] ]
[[package]]
name = "rustversion"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
[[package]] [[package]]
name = "rusty-fork" name = "rusty-fork"
version = "0.3.0" version = "0.3.0"
@@ -620,27 +273,6 @@ dependencies = [
"wait-timeout", "wait-timeout",
] ]
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "semver"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.219" version = "1.0.219"
@@ -661,31 +293,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "siphasher"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "string_cache"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared",
"precomputed-hash",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.104" version = "2.0.104"
@@ -704,23 +311,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
dependencies = [ dependencies = [
"fastrand", "fastrand",
"getrandom 0.3.3", "getrandom",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "term"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
dependencies = [
"dirs-next",
"rustversion",
"winapi",
]
[[package]] [[package]]
name = "termcolor" name = "termcolor"
version = "1.4.1" version = "1.4.1"
@@ -730,33 +326,13 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "thiserror"
version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b"
dependencies = [
"thiserror-impl 1.0.57",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.14" version = "2.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e"
dependencies = [ dependencies = [
"thiserror-impl 2.0.14", "thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81"
dependencies = [
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
@@ -770,15 +346,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]] [[package]]
name = "unarray" name = "unarray"
version = "0.1.4" version = "0.1.4"
@@ -797,12 +364,6 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]] [[package]]
name = "wait-timeout" name = "wait-timeout"
version = "0.2.1" version = "0.2.1"
@@ -812,22 +373,6 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.14.2+wasi-0.2.4" version = "0.14.2+wasi-0.2.4"
@@ -837,22 +382,6 @@ dependencies = [
"wit-bindgen-rt", "wit-bindgen-rt",
] ]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.9"
@@ -862,12 +391,6 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.59.0" version = "0.59.0"

View File

@@ -1,10 +1,10 @@
mod error; mod error;
mod parse;
pub mod tokens; pub mod tokens;
#[cfg(test)] #[cfg(test)]
use crate::syntax::error::ParserError; use crate::syntax::error::ParserError;
#[cfg(test)] use crate::syntax::parse::Parser;
use crate::syntax::parser::*;
#[cfg(test)] #[cfg(test)]
use crate::syntax::tokens::Lexer; use crate::syntax::tokens::Lexer;
use codespan_reporting::diagnostic::Label; use codespan_reporting::diagnostic::Label;
@@ -171,7 +171,7 @@ pub enum Type {
Variable(Location, String), Variable(Location, String),
Primitive(Location, String), Primitive(Location, String),
Application(Box<Type>, Vec<Type>), Application(Box<Type>, Vec<Type>),
Function(Box<Type>, Box<Type>), Function(Vec<Type>, Box<Type>),
} }
#[derive(Debug)] #[derive(Debug)]
@@ -202,14 +202,9 @@ pub struct IntegerWithBase {
#[test] #[test]
fn can_parse_constants() { fn can_parse_constants() {
let parse_constant = |str| { let parse_constant = |str| {
let lexer = Lexer::from(str).map(|item| { let lexer = Lexer::from(str);
item.map_err(|e| ParserError::LexerError { let mut result = Parser::new(0, lexer);
file_id: 0, result.parse_constant()
error: e,
})
});
let result = ConstantValueParser::new().parse(0, lexer);
result
}; };
assert!(matches!( assert!(matches!(
@@ -265,17 +260,11 @@ fn can_parse_constants() {
#[test] #[test]
fn can_parse_types() { fn can_parse_types() {
let parse_type = |str| { let parse_type = |str| {
let lexer = Lexer::from(str).map(|item| { let lexer = Lexer::from(str);
item.map_err(|e| ParserError::LexerError { let mut result = Parser::new(0, lexer);
file_id: 0, result.parse_type()
error: e,
})
});
let result = TypeParser::new().parse(0, lexer);
result
}; };
println!("cons result: {:?}", parse_type("Cons"));
assert!(matches!( assert!(matches!(
parse_type("Cons"), parse_type("Cons"),
Ok(Type::Application(cons, empty)) if Ok(Type::Application(cons, empty)) if
@@ -293,6 +282,9 @@ fn can_parse_types() {
matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)]
if b1 == "a" && b2 == "b") if b1 == "a" && b2 == "b")
)); ));
println!("------");
println!("result: {:?}", parse_type("a -> z"));
println!("------");
assert!(matches!( assert!(matches!(
parse_type("a -> z"), parse_type("a -> z"),
Ok(Type::Function(a, z)) Ok(Type::Function(a, z))

View File

@@ -7,6 +7,17 @@ use thiserror::Error;
pub enum ParserError { pub enum ParserError {
#[error("Lexer error at {file_id}: {error}")] #[error("Lexer error at {file_id}: {error}")]
LexerError { file_id: usize, error: LexerError }, LexerError { file_id: usize, error: LexerError },
#[error("Unacceptable end of file at {file_id} while {place}")]
UnacceptableEof { file_id: usize, place: &'static str },
#[error("Unexpected token at {file_id}: expected {expected}, saw {token}")]
UnexpectedToken {
file_id: usize,
span: Range<usize>,
token: Token,
expected: &'static str,
},
} }
#[derive(Clone, Debug, Error, PartialEq)] #[derive(Clone, Debug, Error, PartialEq)]

172
src/syntax/parse.rs Normal file
View File

@@ -0,0 +1,172 @@
use crate::syntax::error::ParserError;
use crate::syntax::tokens::{Lexer, LocatedToken, Token};
use crate::syntax::*;
pub struct Parser<'a> {
file_id: usize,
lexer: Lexer<'a>,
known_tokens: Vec<LocatedToken>,
}
impl<'a> Parser<'a> {
pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> {
Parser {
file_id,
lexer,
known_tokens: vec![],
}
}
/// Get the next token.
pub fn next(&mut self) -> Result<Option<LocatedToken>, ParserError> {
let result = self.known_tokens.pop();
if result.is_some() {
Ok(result)
} else {
self.lexer
.next()
.transpose()
.map_err(|error| ParserError::LexerError {
file_id: self.file_id,
error,
})
}
}
fn save(&mut self, token: LocatedToken) {
self.known_tokens.push(token)
}
fn bad_eof(&mut self, place: &'static str) -> ParserError {
ParserError::UnacceptableEof {
file_id: self.file_id,
place,
}
}
fn to_location(&self, span: Range<usize>) -> Location {
Location {
file_id: self.file_id,
span,
}
}
pub fn parse_type(&mut self) -> Result<Type, ParserError> {
self.parse_function_type()
}
fn parse_function_type(&mut self) -> Result<Type, ParserError> {
let mut args = Vec::new();
while let Ok(t) = self.parse_type_application() {
println!("got argument type: {t:?}");
args.push(t);
}
let Some(maybe_arrow) = self.next()? else {
println!("no arrow token");
match args.pop() {
None => {
return Err(ParserError::UnacceptableEof {
file_id: self.file_id,
place: "parsing function type or type",
});
}
Some(t) if args.len() == 0 => return Ok(t),
Some(_) => {
return Err(ParserError::UnacceptableEof {
file_id: self.file_id,
place: "looking for '->' in function type",
});
}
}
};
if maybe_arrow.token == Token::Arrow {
println!("found function arrow");
let right = self.parse_function_type()?;
Ok(Type::Function(args, Box::new(right)))
} else if args.len() == 1 {
println!("found non function arrow token {}", maybe_arrow.token);
Ok(args.pop().expect("length = 1 works"))
} else {
self.save(maybe_arrow.clone());
let LocatedToken { token, span } = maybe_arrow;
Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span,
token,
expected: "'->' in function type",
})
}
}
fn parse_type_application(&mut self) -> Result<Type, ParserError> {
let LocatedToken { token, span } =
self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?;
let constructor = match token {
Token::TypeName(x) => Type::Constructor(self.to_location(span), x),
Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x),
_ => {
println!("saving {token}");
self.save(LocatedToken { token, span });
return self.parse_base_type();
}
};
let mut args = vec![];
while let Ok(next_arg) = self.parse_base_type() {
args.push(next_arg);
}
Ok(Type::Application(Box::new(constructor), args))
}
fn parse_base_type(&mut self) -> Result<Type, ParserError> {
let LocatedToken { token, span } =
self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?;
match token {
Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)),
Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)),
Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)),
token => {
self.save(LocatedToken {
token: token.clone(),
span: span.clone(),
});
Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span,
token,
expected: "type constructor, type variable, or primitive type",
})
}
}
}
pub fn parse_constant(&mut self) -> Result<ConstantValue, ParserError> {
let LocatedToken { token, span } = self
.next()?
.ok_or_else(|| self.bad_eof("looking for a constant"))?;
match token {
Token::Integer(iwb) => Ok(ConstantValue::Integer(self.to_location(span), iwb)),
Token::Character(c) => Ok(ConstantValue::Character(self.to_location(span), c)),
Token::String(s) => Ok(ConstantValue::String(self.to_location(span), s)),
_ => Err(ParserError::UnexpectedToken {
file_id: self.file_id,
span,
token,
expected: "constant value",
}),
}
}
}

View File

@@ -2,8 +2,15 @@ use crate::syntax::IntegerWithBase;
use crate::syntax::error::LexerError; use crate::syntax::error::LexerError;
use proptest_derive::Arbitrary; use proptest_derive::Arbitrary;
use std::fmt; use std::fmt;
use std::ops::Range;
use std::str::CharIndices; use std::str::CharIndices;
#[derive(Clone)]
pub struct LocatedToken {
pub token: Token,
pub span: Range<usize>,
}
/// A single token of the input stream; used to help the parsing function over /// A single token of the input stream; used to help the parsing function over
/// more concrete things than bytes. /// more concrete things than bytes.
/// ///
@@ -92,7 +99,6 @@ struct LexerState<'a> {
impl<'a> From<&'a str> for Lexer<'a> { impl<'a> From<&'a str> for Lexer<'a> {
fn from(value: &'a str) -> Self { fn from(value: &'a str) -> Self {
println!("LEXING '{value}'");
Lexer::Working(LexerState { Lexer::Working(LexerState {
stream: value.char_indices(), stream: value.char_indices(),
buffer: None, buffer: None,
@@ -110,7 +116,7 @@ impl<'a> Lexer<'a> {
} }
impl<'a> Iterator for Lexer<'a> { impl<'a> Iterator for Lexer<'a> {
type Item = Result<(usize, Token, usize), LexerError>; type Item = Result<LocatedToken, LexerError>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self { match self {
@@ -124,15 +130,11 @@ impl<'a> Iterator for Lexer<'a> {
} }
Ok(None) => { Ok(None) => {
println!("LEXER DONE");
*self = Lexer::Done(state.stream.offset()); *self = Lexer::Done(state.stream.offset());
None None
} }
Ok(Some((start, token, end))) => { Ok(Some(ltoken)) => Some(Ok(ltoken)),
println!("TOKEN: {:?}", token);
Some(Ok((start, token, end)))
}
}, },
} }
} }
@@ -141,24 +143,26 @@ impl<'a> Iterator for Lexer<'a> {
impl<'a> LexerState<'a> { impl<'a> LexerState<'a> {
fn next_char(&mut self) -> Option<(usize, char)> { fn next_char(&mut self) -> Option<(usize, char)> {
let result = self.buffer.take().or_else(|| self.stream.next()); let result = self.buffer.take().or_else(|| self.stream.next());
println!("next_char() -> {result:?}");
result result
} }
fn stash_char(&mut self, idx: usize, c: char) { fn stash_char(&mut self, idx: usize, c: char) {
println!("stash_char({idx}, {c})");
assert!(self.buffer.is_none()); assert!(self.buffer.is_none());
self.buffer = Some((idx, c)); self.buffer = Some((idx, c));
} }
fn next_token(&mut self) -> Result<Option<(usize, Token, usize)>, LexerError> { fn next_token(&mut self) -> Result<Option<LocatedToken>, LexerError> {
while let Some((token_start_offset, char)) = self.next_char() { while let Some((token_start_offset, char)) = self.next_char() {
if char.is_whitespace() { if char.is_whitespace() {
continue; continue;
} }
let simple_response = let simple_response = |token| {
|token| Ok(Some((token_start_offset, token, self.stream.offset()))); Ok(Some(LocatedToken {
token,
span: token_start_offset..self.stream.offset(),
}))
};
match char { match char {
'(' => return simple_response(Token::OpenParen), '(' => return simple_response(Token::OpenParen),
@@ -219,14 +223,17 @@ impl<'a> LexerState<'a> {
fn starts_with_zero( fn starts_with_zero(
&mut self, &mut self,
token_start_offset: usize, token_start_offset: usize,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
match self.next_char() { match self.next_char() {
None => { None => {
let token = Token::Integer(IntegerWithBase { let token = Token::Integer(IntegerWithBase {
base: None, base: None,
value: 0, value: 0,
}); });
Ok(Some((token_start_offset, token, self.stream.offset()))) Ok(Some(LocatedToken {
token,
span: token_start_offset..self.stream.offset(),
}))
} }
Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0),
@@ -243,7 +250,10 @@ impl<'a> LexerState<'a> {
base: None, base: None,
value: 0, value: 0,
}); });
Ok(Some((token_start_offset, token, offset))) Ok(Some(LocatedToken {
token,
span: token_start_offset..offset,
}))
} }
} }
} }
@@ -255,7 +265,7 @@ impl<'a> LexerState<'a> {
base: u32, base: u32,
provided_base: Option<u8>, provided_base: Option<u8>,
mut value: u64, mut value: u64,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
let mut end_offset = self.stream.offset(); let mut end_offset = self.stream.offset();
while let Some((offset, c)) = self.next_char() { while let Some((offset, c)) = self.next_char() {
@@ -273,7 +283,10 @@ impl<'a> LexerState<'a> {
value, value,
}); });
Ok(Some((token_start_offset, token, end_offset))) Ok(Some(LocatedToken {
token,
span: token_start_offset..end_offset,
}))
} }
fn parse_identifier( fn parse_identifier(
@@ -282,7 +295,7 @@ impl<'a> LexerState<'a> {
mut identifier: String, mut identifier: String,
mut allowed_character: fn(char) -> bool, mut allowed_character: fn(char) -> bool,
mut builder: fn(String) -> Token, mut builder: fn(String) -> Token,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
let mut end_offset = self.stream.offset(); let mut end_offset = self.stream.offset();
while let Some((offset, c)) = self.next_char() { while let Some((offset, c)) = self.next_char() {
@@ -321,13 +334,16 @@ impl<'a> LexerState<'a> {
} }
} }
Ok(Some((token_start_offset, builder(identifier), end_offset))) Ok(Some(LocatedToken {
token: builder(identifier),
span: token_start_offset..end_offset,
}))
} }
fn starts_with_single( fn starts_with_single(
&mut self, &mut self,
token_start_offset: usize, token_start_offset: usize,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
let Some((_, mut char)) = self.next_char() else { let Some((_, mut char)) = self.next_char() else {
return Err(LexerError::UnfinishedCharacter { return Err(LexerError::UnfinishedCharacter {
span: token_start_offset..self.stream.offset(), span: token_start_offset..self.stream.offset(),
@@ -351,7 +367,10 @@ impl<'a> LexerState<'a> {
}); });
} }
Ok(Some((token_start_offset, Token::Character(char), idx))) Ok(Some(LocatedToken {
token: Token::Character(char),
span: token_start_offset..idx,
}))
} }
fn get_escaped_character(&mut self, token_start_offset: usize) -> Result<char, LexerError> { fn get_escaped_character(&mut self, token_start_offset: usize) -> Result<char, LexerError> {
@@ -425,12 +444,17 @@ impl<'a> LexerState<'a> {
fn starts_with_double( fn starts_with_double(
&mut self, &mut self,
token_start_offset: usize, token_start_offset: usize,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
let mut result = String::new(); let mut result = String::new();
while let Some((idx, char)) = self.next_char() { while let Some((idx, char)) = self.next_char() {
match char { match char {
'"' => return Ok(Some((token_start_offset, Token::String(result), idx))), '"' => {
return Ok(Some(LocatedToken {
token: Token::String(result),
span: token_start_offset..idx,
}));
}
'\\' => result.push(self.get_escaped_character(idx)?), '\\' => result.push(self.get_escaped_character(idx)?),
@@ -446,12 +470,18 @@ impl<'a> LexerState<'a> {
fn starts_with_dash( fn starts_with_dash(
&mut self, &mut self,
token_start_offset: usize, token_start_offset: usize,
) -> Result<Option<(usize, Token, usize)>, LexerError> { ) -> Result<Option<LocatedToken>, LexerError> {
match self.next_char() { match self.next_char() {
None => Ok(Some((token_start_offset, Token::OperatorName("-".into()), token_start_offset))), None => Ok(Some(LocatedToken {
Some((end, '>')) => Ok(Some((token_start_offset, Token::Arrow, end))), token: Token::OperatorName("-".into()),
Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => span: token_start_offset..token_start_offset + 1,
self.parse_identifier( })),
Some((end, '>')) => Ok(Some(LocatedToken {
token: Token::Arrow,
span: token_start_offset..end,
})),
Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => self
.parse_identifier(
token_start_offset, token_start_offset,
format!("-{c}"), format!("-{c}"),
|c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(),
@@ -459,7 +489,10 @@ impl<'a> LexerState<'a> {
), ),
Some((idx, c)) => { Some((idx, c)) => {
self.stash_char(idx, c); self.stash_char(idx, c);
Ok(Some((token_start_offset, Token::OperatorName("-".into()), idx))) Ok(Some(LocatedToken {
token: Token::OperatorName("-".into()),
span: token_start_offset..idx,
}))
} }
} }
} }
@@ -474,7 +507,7 @@ proptest::proptest! {
let initial_token = tokens.next() let initial_token = tokens.next()
.expect("Can get a token without an error.") .expect("Can get a token without an error.")
.expect("Can get a valid token.") .expect("Can get a valid token.")
.1; .token;
proptest::prop_assert_eq!(token, initial_token); proptest::prop_assert_eq!(token, initial_token);
proptest::prop_assert!(tokens.next().is_none()); proptest::prop_assert!(tokens.next().is_none());
@@ -488,7 +521,7 @@ fn parsed_single_token(s: &str) -> Token {
.next() .next()
.expect(format!("Can get at least one token from {s:?}").as_str()) .expect(format!("Can get at least one token from {s:?}").as_str())
.expect("Can get a valid token.") .expect("Can get a valid token.")
.1; .token;
assert!( assert!(
tokens.next().is_none(), tokens.next().is_none(),
@@ -608,7 +641,7 @@ fn operators_work_as_expected() {
#[test] #[test]
fn can_separate_pieces() { fn can_separate_pieces() {
let mut lexer = Lexer::from("a-b"); let mut lexer = Lexer::from("a-b");
let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token);
assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::ValueName("a".into())), next_token());
assert_eq!(Some(Token::OperatorName("-".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token());
@@ -616,7 +649,7 @@ fn can_separate_pieces() {
assert_eq!(None, next_token()); assert_eq!(None, next_token());
let mut lexer = Lexer::from("a--b"); let mut lexer = Lexer::from("a--b");
let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token);
assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::ValueName("a".into())), next_token());
assert_eq!(Some(Token::OperatorName("--".into())), next_token()); assert_eq!(Some(Token::OperatorName("--".into())), next_token());
@@ -624,7 +657,7 @@ fn can_separate_pieces() {
assert_eq!(None, next_token()); assert_eq!(None, next_token());
let mut lexer = Lexer::from("a - -b"); let mut lexer = Lexer::from("a - -b");
let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token);
assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::ValueName("a".into())), next_token());
assert_eq!(Some(Token::OperatorName("-".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token());