From a663d8f1fb00694c1cb041898f26132eae53fc32 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 9 Aug 2025 13:47:08 -0700 Subject: [PATCH 01/33] Start a Rust implementation, which is broken with gitignore. --- .gitignore | 5 + Cargo.lock | 1044 +++++++++++++++++++++++++++++++++++++ Cargo.toml | 16 + build.rs | 5 + src/bin/bangc.rs | 1 + src/lib.rs | 1 + src/syntax.rs | 251 +++++++++ src/syntax/error.rs | 116 +++++ src/syntax/parser.lalrpop | 39 ++ src/syntax/tokens.rs | 609 ++++++++++++++++++++++ 10 files changed, 2087 insertions(+) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 build.rs create mode 100644 src/bin/bangc.rs create mode 100644 src/lib.rs create mode 100644 src/syntax.rs create mode 100644 src/syntax/error.rs create mode 100644 src/syntax/parser.lalrpop create mode 100644 src/syntax/tokens.rs diff --git a/.gitignore b/.gitignore index b1604b6..85f98e5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,8 @@ hsrc/Syntax/Lexer.hs hsrc/Syntax/Parser.hs bang + + +# Added by cargo + +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f08dbe5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1044 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bang" +version = "0.1.0" +dependencies = [ + "codespan", + "codespan-reporting", + "lalrpop", + "lalrpop-util", + "logos", + "proptest", + "proptest-derive", + "thiserror 2.0.14", +] + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "codespan" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4b418d52c9206820a56fc1aa28db73d67e346ba8ba6aa90987e8d6becef7e4" +dependencies = [ + "codespan-reporting", + "serde", +] + +[[package]] +name = "codespan-reporting" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +dependencies = [ + "serde", + "termcolor", + "unicode-width", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "ena" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" +dependencies = [ + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "lalrpop" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" +dependencies = [ + "ascii-canvas", + "bit-set 0.5.3", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" +dependencies = [ + "bit-set 0.8.0", + "bit-vec 0.8.0", + "bitflags", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "proptest-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "095a99f75c69734802359b682be8daaf8980296731f6470434ea2c652af1dd30" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.3", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.57", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.60.2", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl 1.0.57", +] + +[[package]] +name = "thiserror" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" +dependencies = [ + "thiserror-impl 2.0.14", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1848442 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "bang" +version = "0.1.0" +edition = "2024" + +[dependencies] +codespan = "0.12.0" +codespan-reporting = "0.12.0" +lalrpop-util = "0.20.2" +logos = "0.15.1" +proptest = "1.7.0" +proptest-derive = "0.6.0" +thiserror = "2.0.12" + +[build-dependencies] +lalrpop = "0.20.2" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..23c7d3f --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +extern crate lalrpop; + +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/bin/bangc.rs b/src/bin/bangc.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/src/bin/bangc.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4a39d2c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod syntax; diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..a4701b6 --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,251 @@ +use lalrpop_util::lalrpop_mod; + +mod error; +lalrpop_mod!( + #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)] + parser, + "/syntax/parser.rs" +); +pub mod tokens; + +#[cfg(test)] +use crate::syntax::error::ParserError; +#[cfg(test)] +use crate::syntax::parser::*; +#[cfg(test)] +use crate::syntax::tokens::Lexer; +use codespan_reporting::diagnostic::Label; +use proptest_derive::Arbitrary; +use std::cmp::{max, min}; +use std::fmt::Debug; +use std::ops::Range; + +#[derive(Debug)] +pub struct Location { + file_id: usize, + span: Range, +} + +impl Location { + pub fn new(file_id: usize, span: Range) -> Self { + Location { file_id, span } + } + + pub fn extend_to(&self, other: &Location) -> Location { + assert_eq!(self.file_id, other.file_id); + Location { + file_id: self.file_id, + span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), + } + } + + pub fn primary_label(&self) -> Label { + Label::primary(self.file_id, self.span.clone()) + } + + pub fn secondary_label(&self) -> Label { + Label::secondary(self.file_id, self.span.clone()) + } +} + +pub struct Module { + definitions: Vec, +} + +pub struct Definition { + location: Location, + export: ExportClass, + type_restrictions: TypeRestrictions, + definition: Def, +} + +pub enum Def { + Enumeration(EnumerationDef), + Structure(StructureDef), + Function(FunctionDef), + Value(ValueDef), +} + +impl Def { + fn location(&self) -> &Location { + match self { + Def::Enumeration(def) => &def.location, + Def::Structure(def) => &def.location, + Def::Function(def) => &def.location, + Def::Value(def) => &def.location, + } + } +} + +pub struct EnumerationDef { + location: Location, + options: Vec, +} + +pub struct EnumerationVariant { + location: Location, + name: String, + arguments: Vec, +} + +pub struct StructureDef { + name: String, + location: Location, + fields: Vec, +} + +pub struct StructureField { + name: String, + field_type: Type, +} + +pub struct FunctionDef { + name: String, + location: Location, + arguments: Vec, + return_type: Option, + body: Vec, +} + +pub struct FunctionArg { + name: String, + arg_type: Option, +} + +pub struct ValueDef { + name: String, + location: Location, + value: Value, +} + +pub enum ExportClass { + Public, + Private, +} + +pub enum Statement { + Binding(BindingStmt), +} + +pub struct BindingStmt { + location: Location, + mutable: bool, + variable: String, + value: Expression, +} + +pub enum Expression { + Value(Value), +} + +pub struct TypeRestrictions { + restrictions: Vec, +} + +impl TypeRestrictions { + fn empty() -> Self { + TypeRestrictions { + restrictions: vec![], + } + } +} + +pub struct TypeRestriction { + location: Location, + class: String, + variables: Vec, +} + +pub enum Type { + Constructor(Location, String), + Variable(Location, String), + Primitive(Location, String), + Application(Box, Vec), + Function(Vec, Box), +} + +pub enum Value { + Constant(ConstantValue), +} + +pub enum ConstantValue { + Integer(Location, IntegerWithBase), + Character(Location, char), + String(Location, String), +} + +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub struct IntegerWithBase { + #[proptest(strategy = "proptest::prop_oneof![ \ + proptest::strategy::Just(None), \ + proptest::strategy::Just(Some(2)), \ + proptest::strategy::Just(Some(8)), \ + proptest::strategy::Just(Some(10)), \ + proptest::strategy::Just(Some(16)), \ + ]")] + base: Option, + value: u64, +} + +#[test] +fn can_parse_constants() { + let parse_constant = |str| { + let lexer = Lexer::from(str).map(|item| { + item.map_err(|e| ParserError::LexerError { + file_id: 0, + error: e, + }) + }); + let result = ConstantValueParser::new().parse(0, lexer); + result + }; + + assert!(matches!( + parse_constant("16"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: None, + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0x10"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(16), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0o20"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(8), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0b10000"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(2), + value: 16, + } + )) + )); + assert!( + matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x)) + if x == "foo") + ); + assert!(matches!( + parse_constant("'f'"), + Ok(ConstantValue::Character(_, 'f')) + )); +} diff --git a/src/syntax/error.rs b/src/syntax/error.rs new file mode 100644 index 0000000..eccef47 --- /dev/null +++ b/src/syntax/error.rs @@ -0,0 +1,116 @@ +//use codespan_reporting::diagnostic::{Diagnostic, Label}; +use crate::syntax::tokens::Token; +use std::ops::Range; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ParserError { + #[error("Lexer error at {file_id}: {error}")] + LexerError { file_id: usize, error: LexerError }, +} + +#[derive(Clone, Debug, Error, PartialEq)] +pub enum LexerError { + #[error("Illegal control character in input stream at offset {offset}")] + IllegalControlCharacter { offset: usize }, + + #[error("Illegal primitive value/type; it cut off before we could determine which at {span:?}")] + IllegalPrimitive { span: Range }, + + #[error("Illegal character in primitive ({char:?}) at {span:?}")] + IllegalPrimitiveCharacter { span: Range, char: char }, + + #[error("Unfinished character constant found at {span:?}")] + UnfinishedCharacter { span: Range }, + + #[error("Unfinished string constant found at {span:?}")] + UnfinishedString { span: Range }, + + #[error("Character {char:?} has some extra bits at the end at {span:?}")] + OverlongCharacter { char: char, span: Range }, + + #[error("Unknown escaped character {escaped_char:?} at {span:?}")] + UnknownEscapeCharacter { + escaped_char: char, + span: Range, + }, + + #[error("Invalid unicode escape sequence at {span:?}")] + InvalidUnicode { span: Range }, +} + +impl LexerError { + pub fn to_triple(&self) -> (usize, Result, usize) { + match self { + LexerError::IllegalControlCharacter { offset } => (*offset, Err(self.clone()), *offset), + LexerError::IllegalPrimitive { span } => (span.start, Err(self.clone()), span.end), + LexerError::IllegalPrimitiveCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedString { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::OverlongCharacter { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::UnknownEscapeCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::InvalidUnicode { span, .. } => (span.start, Err(self.clone()), span.end), + } + } +} + +//impl From for Diagnostic { +// fn from(value: LexerError) -> Self { +// match value { +// LexerError::IllegalControlCharacter { file, offset } => Diagnostic::error() +// .with_code("E1001") +// .with_message("Illegal control character in input stream") +// .with_label(Label::primary(file, offset..offset).with_message("illegal character")), +// +// LexerError::IllegalPrimitive { file, span } => Diagnostic::error() +// .with_code("E1002") +// .with_message("Illegal primitive; it cut off before it could finish") +// .with_label( +// Label::primary(file, span) +// .with_message("should be at least one character after the %"), +// ), +// +// LexerError::IllegalPrimitiveCharacter { file, span, char } => Diagnostic::error() +// .with_code("E1003") +// .with_message(format!("Illegal character {char:?} in primitive")) +// .with_label(Label::primary(file, span).with_message("illegal character")), +// +// LexerError::UnfinishedCharacter { file, span } => Diagnostic::error() +// .with_code("E1004") +// .with_message("Unfinished character in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished character")), +// +// LexerError::UnfinishedString { file, span } => Diagnostic::error() +// .with_code("E1005") +// .with_message("Unfinished string in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished string")), +// +// LexerError::OverlongCharacter { file, char, span } => Diagnostic::error() +// .with_code("E1006") +// .with_message(format!( +// "Character {char:?} has some extra bits at the end of it." +// )) +// .with_label(Label::primary(file, span).with_message("overlong character")), +// +// LexerError::UnknownEscapeCharacter { +// file, +// escaped_char, +// span, +// } => Diagnostic::error() +// .with_code("E1007") +// .with_message(format!("Unknown escape character {escaped_char:?}.")) +// .with_label(Label::primary(file, span).with_message("unknown character")), +// +// LexerError::InvalidUnicode { file, span } => Diagnostic::error() +// .with_code("E1008") +// .with_message("Unknown or invalid unicode escape sequence.") +// .with_label(Label::primary(file, span).with_message("escape sequence")), +// } +// } +//} diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop new file mode 100644 index 0000000..1bf890e --- /dev/null +++ b/src/syntax/parser.lalrpop @@ -0,0 +1,39 @@ +use crate::syntax::*; +use crate::syntax::error::ParserError; +use crate::syntax::tokens::*; + +grammar(file_id: usize); + +extern { + type Location = usize; + type Error = ParserError; + + enum Token { + "(" => Token::OpenParen, + ")" => Token::CloseParen, + "[" => Token::OpenSquare, + "]" => Token::CloseSquare, + "{" => Token::OpenBrace, + "}" => Token::CloseBrace, + ";" => Token::Semi, + ":" => Token::Colon, + "," => Token::Comma, + "`" => Token::BackTick, + "\\" => Token::Lambda(_), + + "" => Token::TypeName(), + "" => Token::ValueName(), + "" => Token::OperatorName(), + "" => Token::PrimitiveTypeName(), + "" => Token::PrimitiveValueName(), + "" => Token::Integer(), + "" => Token::Character(), + "" => Token::String(), + } +} + +pub ConstantValue: ConstantValue = { + "> => ConstantValue::Integer(Location::new(file_id, s..e), x), + "> => ConstantValue::Character(Location::new(file_id, s..e), x), + "> => ConstantValue::String(Location::new(file_id, s..e), x), +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs new file mode 100644 index 0000000..9399b9e --- /dev/null +++ b/src/syntax/tokens.rs @@ -0,0 +1,609 @@ +use crate::syntax::IntegerWithBase; +use crate::syntax::error::LexerError; +use proptest_derive::Arbitrary; +use std::fmt; +use std::str::CharIndices; + +/// A single token of the input stream; used to help the parsing function over +/// more concrete things than bytes. +/// +/// The [`std::fmt::Display`] implementation is designed to round-trip, so those +/// needing a more regular or descriptive option should consider using the +/// [`std::fmt::Debug`] implementation instead. +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub enum Token { + OpenParen, + CloseParen, + OpenSquare, + CloseSquare, + OpenBrace, + CloseBrace, + Semi, + Colon, + Comma, + BackTick, + Lambda(bool), + + TypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + ValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + OperatorName( + #[proptest( + regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*" + )] + String, + ), + + PrimitiveTypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + PrimitiveValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + + Integer(IntegerWithBase), + Character(char), + String(String), +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + Token::OpenSquare => write!(f, "["), + Token::CloseSquare => write!(f, "]"), + Token::OpenBrace => write!(f, "{{"), + Token::CloseBrace => write!(f, "}}"), + Token::Semi => write!(f, ";"), + Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::BackTick => write!(f, "`"), + Token::Lambda(false) => write!(f, "\\"), + Token::Lambda(true) => write!(f, "λ"), + Token::TypeName(str) => write!(f, "{str}"), + Token::ValueName(str) => write!(f, "{str}"), + Token::OperatorName(str) => write!(f, "{str}"), + Token::PrimitiveTypeName(str) => write!(f, "prim%{str}"), + Token::PrimitiveValueName(str) => write!(f, "prim%{str}"), + Token::Integer(IntegerWithBase { base, value }) => match base { + None => write!(f, "{value}"), + Some(2) => write!(f, "0b{value:b}"), + Some(8) => write!(f, "0o{value:o}"), + Some(10) => write!(f, "0d{value}"), + Some(16) => write!(f, "0x{value:x}"), + Some(base) => write!(f, ""), + }, + Token::Character(c) => write!(f, "{c:?}"), + Token::String(s) => write!(f, "{s:?}"), + } + } +} + +#[allow(private_interfaces)] +pub enum Lexer<'a> { + Working(LexerState<'a>), + Errored(LexerError), + Done(usize), +} + +struct LexerState<'a> { + stream: CharIndices<'a>, + buffer: Option<(usize, char)>, +} + +impl<'a> From<&'a str> for Lexer<'a> { + fn from(value: &'a str) -> Self { + println!("LEXING '{value}'"); + Lexer::Working(LexerState { + stream: value.char_indices(), + buffer: None, + }) + } +} + +impl<'a> Lexer<'a> { + pub fn new(stream: &'a str) -> Self { + Lexer::Working(LexerState { + stream: stream.char_indices(), + buffer: None, + }) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result<(usize, Token, usize), LexerError>; + + fn next(&mut self) -> Option { + match self { + Lexer::Done(_) => None, + Lexer::Errored(e) => Some(Err(e.clone())), + Lexer::Working(state) => match state.next_token() { + Err(e) => { + println!("ERROR: {e}"); + *self = Lexer::Errored(e.clone()); + Some(Err(e)) + } + + Ok(None) => { + println!("LEXER DONE"); + *self = Lexer::Done(state.stream.offset()); + None + } + + Ok(Some((start, token, end))) => { + println!("TOKEN: {:?}", token); + Some(Ok((start, token, end))) + } + }, + } + } +} + +impl<'a> LexerState<'a> { + fn next_char(&mut self) -> Option<(usize, char)> { + let result = self.buffer.take().or_else(|| self.stream.next()); + println!("next_char() -> {result:?}"); + result + } + + fn stash_char(&mut self, idx: usize, c: char) { + println!("stash_char({idx}, {c})"); + assert!(self.buffer.is_none()); + self.buffer = Some((idx, c)); + } + + fn next_token(&mut self) -> Result, LexerError> { + while let Some((token_start_offset, char)) = self.next_char() { + if char.is_whitespace() { + continue; + } + + let simple_response = + |token| Ok(Some((token_start_offset, token, self.stream.offset()))); + + match char { + '(' => return simple_response(Token::OpenParen), + ')' => return simple_response(Token::CloseParen), + '[' => return simple_response(Token::OpenSquare), + ']' => return simple_response(Token::CloseSquare), + '{' => return simple_response(Token::OpenBrace), + '}' => return simple_response(Token::CloseBrace), + ';' => return simple_response(Token::Semi), + ':' => return simple_response(Token::Colon), + ',' => return simple_response(Token::Comma), + '`' => return simple_response(Token::BackTick), + '\\' => return simple_response(Token::Lambda(false)), + 'λ' => return simple_response(Token::Lambda(true)), + + '0' => return self.starts_with_zero(token_start_offset), + '\'' => return self.starts_with_single(token_start_offset), + '\"' => return self.starts_with_double(token_start_offset), + _ => {} + } + + if let Some(value) = char.to_digit(10) { + return self.parse_integer(token_start_offset, 10, None, value as u64); + } + + if char.is_uppercase() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::TypeName, + ); + } + + if char.is_alphabetic() || char == '_' { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::ValueName, + ); + } + + if !char.is_alphanumeric() && !char.is_whitespace() && !char.is_control() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ); + } + } + + Ok(None) + } + + fn starts_with_zero( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => { + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some((token_start_offset, token, self.stream.offset()))) + } + + Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), + Some((_, 'o')) => self.parse_integer(token_start_offset, 8, Some(8), 0), + Some((_, 'd')) => self.parse_integer(token_start_offset, 10, Some(10), 0), + Some((_, 'x')) => self.parse_integer(token_start_offset, 16, Some(16), 0), + + Some((offset, c)) => { + if let Some(value) = c.to_digit(10) { + self.parse_integer(token_start_offset, 10, None, value as u64) + } else { + self.stash_char(offset, c); + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some((token_start_offset, token, offset))) + } + } + } + } + + fn parse_integer( + &mut self, + token_start_offset: usize, + base: u32, + provided_base: Option, + mut value: u64, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + if let Some(digit) = c.to_digit(base) { + value = (value * (base as u64)) + (digit as u64); + } else { + self.stash_char(offset, c); + break; + } + } + + let token = Token::Integer(IntegerWithBase { + base: provided_base, + value, + }); + + Ok(Some((token_start_offset, token, end_offset))) + } + + fn parse_identifier( + &mut self, + token_start_offset: usize, + mut identifier: String, + mut allowed_character: fn(char) -> bool, + mut builder: fn(String) -> Token, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + + if allowed_character(c) { + identifier.push(c); + } else if identifier == "prim" && c == '%' { + identifier = String::new(); + allowed_character = |c| c.is_alphanumeric() || c == '_'; + match self.next_char() { + None => { + return Err(LexerError::IllegalPrimitive { + span: token_start_offset..end_offset, + }); + } + + Some((_, char)) => { + if char.is_uppercase() { + identifier.push(char); + builder = Token::PrimitiveTypeName; + } else if char.is_lowercase() || char == '_' { + identifier.push(char); + builder = Token::PrimitiveValueName; + } else { + return Err(LexerError::IllegalPrimitiveCharacter { + span: token_start_offset..end_offset, + char, + }); + } + } + } + } else { + self.stash_char(offset, c); + break; + } + } + + Ok(Some((token_start_offset, builder(identifier), end_offset))) + } + + fn starts_with_single( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let Some((_, mut char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char == '\\' { + char = self.get_escaped_character(token_start_offset)?; + } + + let Some((idx, finish_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if finish_char != '\'' { + return Err(LexerError::OverlongCharacter { + char, + span: token_start_offset..self.stream.offset(), + }); + } + + Ok(Some((token_start_offset, Token::Character(char), idx))) + } + + fn get_escaped_character(&mut self, token_start_offset: usize) -> Result { + let Some((idx, escaped_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + match escaped_char { + '0' => Ok('\0'), + 'a' => Ok('\u{0007}'), + 'b' => Ok('\u{0008}'), + 'f' => Ok('\u{000C}'), + 'n' => Ok('\n'), + 'r' => Ok('\r'), + 't' => Ok('\t'), + 'u' => self.get_unicode_sequence(idx), + 'v' => Ok('\u{000B}'), + '\'' => Ok('\''), + '"' => Ok('"'), + '\\' => Ok('\\'), + _ => Err(LexerError::UnknownEscapeCharacter { + escaped_char, + span: idx..self.stream.offset(), + }), + } + } + + fn get_unicode_sequence(&mut self, token_start_offset: usize) -> Result { + let Some((_, char)) = self.next_char() else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char != '{' { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + let mut value = 0; + + while let Some((idx, char)) = self.next_char() { + if let Some(digit) = char.to_digit(16) { + value = (value * 16) + digit; + continue; + } + + if char == '}' { + if let Some(char) = char::from_u32(value) { + return Ok(char); + } else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..idx, + }); + } + } + + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }) + } + + fn starts_with_double( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let mut result = String::new(); + + while let Some((idx, char)) = self.next_char() { + match char { + '"' => return Ok(Some((token_start_offset, Token::String(result), idx))), + + '\\' => result.push(self.get_escaped_character(idx)?), + + _ => result.push(char), + } + } + + Err(LexerError::UnfinishedString { + span: token_start_offset..self.stream.offset(), + }) + } +} + +proptest::proptest! { + #[test] + fn token_string_token(token: Token) { + println!("Starting from {token:?}"); + let string = format!("{token}"); + let mut tokens = Lexer::from(string.as_str()); + let initial_token = tokens.next() + .expect("Can get a token without an error.") + .expect("Can get a valid token.") + .1; + + proptest::prop_assert_eq!(token, initial_token); + proptest::prop_assert!(tokens.next().is_none()); + } +} + +#[cfg(test)] +fn parsed_single_token(s: &str) -> Token { + let mut tokens = Lexer::from(s); + let result = tokens + .next() + .expect(format!("Can get at least one token from {s:?}").as_str()) + .expect("Can get a valid token.") + .1; + + assert!( + tokens.next().is_none(), + "Should only get one token from {s:?}" + ); + + result +} + +#[test] +fn numbers_work_as_expected() { + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 1 + }), + parsed_single_token("1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 1 + }), + parsed_single_token("0b1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 1 + }), + parsed_single_token("0o1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 1 + }), + parsed_single_token("0d1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 1 + }), + parsed_single_token("0x1") + ); + + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 10 + }), + parsed_single_token("10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 2 + }), + parsed_single_token("0b10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 8 + }), + parsed_single_token("0o10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 10 + }), + parsed_single_token("0d10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 16 + }), + parsed_single_token("0x10") + ); +} + +#[test] +fn lambda_works() { + assert_eq!(Token::Lambda(false), parsed_single_token("\\")); + assert_eq!(Token::Lambda(true), parsed_single_token("λ")); + assert_eq!(Token::TypeName("Λ".into()), parsed_single_token("Λ")); +} + +#[test] +fn types_work_as_expected() { + assert_eq!(Token::TypeName("Int".into()), parsed_single_token("Int")); + assert_eq!(Token::TypeName("Int8".into()), parsed_single_token("Int8")); + assert_eq!(Token::TypeName("Γ".into()), parsed_single_token("Γ")); +} + +#[test] +fn values_work_as_expected() { + assert_eq!( + Token::ValueName("alpha".into()), + parsed_single_token("alpha") + ); + assert_eq!(Token::ValueName("ɑ".into()), parsed_single_token("ɑ")); +} + +#[test] +fn operators_work_as_expected() { + assert_eq!(Token::OperatorName("-".into()), parsed_single_token("-")); + assert_eq!(Token::OperatorName("+".into()), parsed_single_token("+")); + assert_eq!(Token::OperatorName("*".into()), parsed_single_token("*")); + assert_eq!(Token::OperatorName("/".into()), parsed_single_token("/")); + assert_eq!(Token::OperatorName("↣".into()), parsed_single_token("↣")); +} + +#[test] +fn can_separate_pieces() { + let mut lexer = Lexer::from("a-b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a--b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("--".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a - -b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); +} -- 2.53.0 From 8e6ac7ecbdf1ff09bc5daeab907652df0dee9ac2 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:39:52 -0700 Subject: [PATCH 02/33] Ignore aider droppings. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 85f98e5..9c98879 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ bang # Added by cargo /target +.aider* -- 2.53.0 From 768b27a8f6e6ff1b10f337589be84179ee721c7f Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:40:18 -0700 Subject: [PATCH 03/33] Meh. Type parsing. --- src/syntax/parser.lalrpop | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop index 1bf890e..5d3936f 100644 --- a/src/syntax/parser.lalrpop +++ b/src/syntax/parser.lalrpop @@ -20,6 +20,7 @@ extern { "," => Token::Comma, "`" => Token::BackTick, "\\" => Token::Lambda(_), + "->" => Token::Arrow, "" => Token::TypeName(), "" => Token::ValueName(), @@ -32,6 +33,36 @@ extern { } } +pub Type: Type = { + FunctionType, +} + +FunctionType: Type = { + TypeApplication, + "->" => + Type::Function(Box::new(argtype), Box::new(ret)), +} + +TypeApplication: Type = { + BaseType, + "> => { + let constructor = Type::Constructor(Location::new(file_id, s..e), c); + Type::Application(Box::new(constructor), arguments) + }, + "> => { + let constructor = Type::Constructor(Location::new(file_id, s..e), c); + Type::Application(Box::new(constructor), arguments) + }, +} + +BaseType: Type = { + "> => + Type::Variable(Location::new(file_id, s..e), v), + "> => + Type::Primitive(Location::new(file_id, s..e), p), + "(" ")" => t, +} + pub ConstantValue: ConstantValue = { "> => ConstantValue::Integer(Location::new(file_id, s..e), x), "> => ConstantValue::Character(Location::new(file_id, s..e), x), -- 2.53.0 From 129bf3c204aea0763534dd539507ff3dc2815672 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:40:57 -0700 Subject: [PATCH 04/33] Add a separate arrow token. --- src/syntax/tokens.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 9399b9e..c9cd846 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -22,13 +22,15 @@ pub enum Token { Colon, Comma, BackTick, + Arrow, Lambda(bool), TypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), ValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), OperatorName( #[proptest( - regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*" + regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*", + filter = "|x| x != \"->\"" )] String, ), @@ -54,6 +56,7 @@ impl fmt::Display for Token { Token::Colon => write!(f, ":"), Token::Comma => write!(f, ","), Token::BackTick => write!(f, "`"), + Token::Arrow => write!(f, "->"), Token::Lambda(false) => write!(f, "\\"), Token::Lambda(true) => write!(f, "λ"), Token::TypeName(str) => write!(f, "{str}"), @@ -174,6 +177,7 @@ impl<'a> LexerState<'a> { '0' => return self.starts_with_zero(token_start_offset), '\'' => return self.starts_with_single(token_start_offset), '\"' => return self.starts_with_double(token_start_offset), + '-' => return self.starts_with_dash(token_start_offset), _ => {} } @@ -438,6 +442,27 @@ impl<'a> LexerState<'a> { span: token_start_offset..self.stream.offset(), }) } + + fn starts_with_dash( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => Ok(Some((token_start_offset, Token::OperatorName("-".into()), token_start_offset))), + Some((end, '>')) => Ok(Some((token_start_offset, Token::Arrow, end))), + Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => + self.parse_identifier( + token_start_offset, + format!("-{c}"), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ), + Some((idx, c)) => { + self.stash_char(idx, c); + Ok(Some((token_start_offset, Token::OperatorName("-".into()), idx))) + } + } + } } proptest::proptest! { -- 2.53.0 From 1baeae1bf06e60b92ca3adba74cc3bf15823f739 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:41:36 -0700 Subject: [PATCH 05/33] Some parsing test cases. --- src/syntax.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/src/syntax.rs b/src/syntax.rs index a4701b6..3b40648 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -48,10 +48,12 @@ impl Location { } } +#[derive(Debug)] pub struct Module { definitions: Vec, } +#[derive(Debug)] pub struct Definition { location: Location, export: ExportClass, @@ -59,6 +61,7 @@ pub struct Definition { definition: Def, } +#[derive(Debug)] pub enum Def { Enumeration(EnumerationDef), Structure(StructureDef), @@ -77,28 +80,33 @@ impl Def { } } +#[derive(Debug)] pub struct EnumerationDef { location: Location, options: Vec, } +#[derive(Debug)] pub struct EnumerationVariant { location: Location, name: String, arguments: Vec, } +#[derive(Debug)] pub struct StructureDef { name: String, location: Location, fields: Vec, } +#[derive(Debug)] pub struct StructureField { name: String, field_type: Type, } +#[derive(Debug)] pub struct FunctionDef { name: String, location: Location, @@ -107,26 +115,31 @@ pub struct FunctionDef { body: Vec, } +#[derive(Debug)] pub struct FunctionArg { name: String, arg_type: Option, } +#[derive(Debug)] pub struct ValueDef { name: String, location: Location, value: Value, } +#[derive(Debug)] pub enum ExportClass { Public, Private, } +#[derive(Debug)] pub enum Statement { Binding(BindingStmt), } +#[derive(Debug)] pub struct BindingStmt { location: Location, mutable: bool, @@ -134,10 +147,12 @@ pub struct BindingStmt { value: Expression, } +#[derive(Debug)] pub enum Expression { Value(Value), } +#[derive(Debug)] pub struct TypeRestrictions { restrictions: Vec, } @@ -150,24 +165,28 @@ impl TypeRestrictions { } } +#[derive(Debug)] pub struct TypeRestriction { location: Location, class: String, variables: Vec, } +#[derive(Debug)] pub enum Type { Constructor(Location, String), Variable(Location, String), Primitive(Location, String), Application(Box, Vec), - Function(Vec, Box), + Function(Box, Box), } +#[derive(Debug)] pub enum Value { Constant(ConstantValue), } +#[derive(Debug)] pub enum ConstantValue { Integer(Location, IntegerWithBase), Character(Location, char), @@ -249,3 +268,58 @@ fn can_parse_constants() { Ok(ConstantValue::Character(_, 'f')) )); } + +#[test] +fn can_parse_types() { + let parse_type = |str| { + let lexer = Lexer::from(str).map(|item| { + item.map_err(|e| ParserError::LexerError { + file_id: 0, + error: e, + }) + }); + let result = TypeParser::new().parse(0, lexer); + result + }; + + println!("cons result: {:?}", parse_type("Cons")); + assert!(matches!( + parse_type("Cons"), + Ok(Type::Application(cons, empty)) if + matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + empty.is_empty() + )); + assert!(matches!( + parse_type("cons"), + Ok(Type::Variable(_, c)) if c == "cons" + )); + assert!(matches!( + parse_type("Cons a b"), + Ok(Type::Application(a, b)) + if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1 == "a" && b2 == "b") + )); + assert!(matches!( + parse_type("a -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); + assert!(matches!( + parse_type("a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)] + if a1 == "a" && b1 == "b") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); + assert!(matches!( + parse_type("Cons a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Application(cons, appargs)] + if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1 == "a" && b2 == "b")) && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); +} -- 2.53.0 From e250a4970345fb799f6094caf8533317a947be77 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:42:25 -0700 Subject: [PATCH 06/33] Cargo.lock --- Cargo.toml | 5 ----- src/syntax.rs | 7 ------- 2 files changed, 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1848442..e994877 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,11 +6,6 @@ edition = "2024" [dependencies] codespan = "0.12.0" codespan-reporting = "0.12.0" -lalrpop-util = "0.20.2" -logos = "0.15.1" proptest = "1.7.0" proptest-derive = "0.6.0" thiserror = "2.0.12" - -[build-dependencies] -lalrpop = "0.20.2" diff --git a/src/syntax.rs b/src/syntax.rs index 3b40648..12ebc45 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,11 +1,4 @@ -use lalrpop_util::lalrpop_mod; - mod error; -lalrpop_mod!( - #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)] - parser, - "/syntax/parser.rs" -); pub mod tokens; #[cfg(test)] -- 2.53.0 From 8657c009c89aaac945249864e60c854a480d775d Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 20:42:30 -0700 Subject: [PATCH 07/33] de-lalrpop --- build.rs | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 build.rs diff --git a/build.rs b/build.rs deleted file mode 100644 index 23c7d3f..0000000 --- a/build.rs +++ /dev/null @@ -1,5 +0,0 @@ -extern crate lalrpop; - -fn main() { - lalrpop::process_root().unwrap(); -} -- 2.53.0 From 24e6bf6318f8fdcfd16bea5f93b08f1af5e09b27 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 6 Sep 2025 22:06:21 -0700 Subject: [PATCH 08/33] Start with hand writing the parser again. --- Cargo.lock | 493 +------------------------------------------ src/syntax.rs | 34 ++- src/syntax/error.rs | 11 + src/syntax/parse.rs | 172 +++++++++++++++ src/syntax/tokens.rs | 101 ++++++--- 5 files changed, 271 insertions(+), 540 deletions(-) create mode 100644 src/syntax/parse.rs diff --git a/Cargo.lock b/Cargo.lock index f08dbe5..38cd685 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,24 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "ascii-canvas" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" -dependencies = [ - "term", -] - [[package]] name = "autocfg" version = "1.5.0" @@ -32,27 +14,9 @@ version = "0.1.0" dependencies = [ "codespan", "codespan-reporting", - "lalrpop", - "lalrpop-util", - "logos", "proptest", "proptest-derive", - "thiserror 2.0.14", -] - -[[package]] -name = "beef" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" - -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec 0.6.3", + "thiserror", ] [[package]] @@ -61,15 +25,9 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "bit-vec 0.8.0", + "bit-vec", ] -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bit-vec" version = "0.8.0" @@ -109,54 +67,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "dirs-next" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" -dependencies = [ - "cfg-if", - "dirs-sys-next", -] - -[[package]] -name = "dirs-sys-next" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" -dependencies = [ - "libc", - "redox_users", - "winapi", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "ena" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" -dependencies = [ - "log", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - [[package]] name = "errno" version = "0.3.13" @@ -173,29 +83,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "getrandom" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - [[package]] name = "getrandom" version = "0.3.3" @@ -205,63 +98,7 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", -] - -[[package]] -name = "hashbrown" -version = "0.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" - -[[package]] -name = "indexmap" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "lalrpop" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" -dependencies = [ - "ascii-canvas", - "bit-set 0.5.3", - "ena", - "itertools", - "lalrpop-util", - "petgraph", - "pico-args", - "regex", - "regex-syntax", - "string_cache", - "term", - "tiny-keccak", - "unicode-xid", - "walkdir", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" -dependencies = [ - "regex-automata", + "wasi", ] [[package]] @@ -276,84 +113,12 @@ version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" -[[package]] -name = "libredox" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" -dependencies = [ - "bitflags", - "libc", -] - [[package]] name = "linux-raw-sys" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" -[[package]] -name = "lock_api" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "logos" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" -dependencies = [ - "logos-derive", -] - -[[package]] -name = "logos-codegen" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" -dependencies = [ - "beef", - "fnv", - "lazy_static", - "proc-macro2", - "quote", - "regex-syntax", - "rustc_version", - "syn", -] - -[[package]] -name = "logos-derive" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" -dependencies = [ - "logos-codegen", -] - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "new_debug_unreachable" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" - [[package]] name = "num-traits" version = "0.2.19" @@ -369,54 +134,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "parking_lot" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.52.6", -] - -[[package]] -name = "petgraph" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pico-args" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -426,12 +143,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "proc-macro2" version = "1.0.97" @@ -447,8 +158,8 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" dependencies = [ - "bit-set 0.8.0", - "bit-vec 0.8.0", + "bit-set", + "bit-vec", "bitflags", "lazy_static", "num-traits", @@ -519,7 +230,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom", ] [[package]] @@ -531,64 +242,12 @@ dependencies = [ "rand_core", ] -[[package]] -name = "redox_syscall" -version = "0.5.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" -dependencies = [ - "bitflags", -] - -[[package]] -name = "redox_users" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" -dependencies = [ - "getrandom 0.2.16", - "libredox", - "thiserror 1.0.57", -] - -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" -[[package]] -name = "rustc_version" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" -dependencies = [ - "semver", -] - [[package]] name = "rustix" version = "1.0.8" @@ -602,12 +261,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - [[package]] name = "rusty-fork" version = "0.3.0" @@ -620,27 +273,6 @@ dependencies = [ "wait-timeout", ] -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" - [[package]] name = "serde" version = "1.0.219" @@ -661,31 +293,6 @@ dependencies = [ "syn", ] -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "string_cache" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" -dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared", - "precomputed-hash", -] - [[package]] name = "syn" version = "2.0.104" @@ -704,23 +311,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.59.0", ] -[[package]] -name = "term" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" -dependencies = [ - "dirs-next", - "rustversion", - "winapi", -] - [[package]] name = "termcolor" version = "1.4.1" @@ -730,33 +326,13 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "thiserror" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" -dependencies = [ - "thiserror-impl 1.0.57", -] - [[package]] name = "thiserror" version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" dependencies = [ - "thiserror-impl 2.0.14", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -770,15 +346,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "unarray" version = "0.1.4" @@ -797,12 +364,6 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - [[package]] name = "wait-timeout" version = "0.2.1" @@ -812,22 +373,6 @@ dependencies = [ "libc", ] -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" @@ -837,22 +382,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.9" @@ -862,12 +391,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/src/syntax.rs b/src/syntax.rs index 12ebc45..dc3f7c4 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,10 +1,10 @@ mod error; +mod parse; pub mod tokens; #[cfg(test)] use crate::syntax::error::ParserError; -#[cfg(test)] -use crate::syntax::parser::*; +use crate::syntax::parse::Parser; #[cfg(test)] use crate::syntax::tokens::Lexer; use codespan_reporting::diagnostic::Label; @@ -171,7 +171,7 @@ pub enum Type { Variable(Location, String), Primitive(Location, String), Application(Box, Vec), - Function(Box, Box), + Function(Vec, Box), } #[derive(Debug)] @@ -202,14 +202,9 @@ pub struct IntegerWithBase { #[test] fn can_parse_constants() { let parse_constant = |str| { - let lexer = Lexer::from(str).map(|item| { - item.map_err(|e| ParserError::LexerError { - file_id: 0, - error: e, - }) - }); - let result = ConstantValueParser::new().parse(0, lexer); - result + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_constant() }; assert!(matches!( @@ -265,20 +260,14 @@ fn can_parse_constants() { #[test] fn can_parse_types() { let parse_type = |str| { - let lexer = Lexer::from(str).map(|item| { - item.map_err(|e| ParserError::LexerError { - file_id: 0, - error: e, - }) - }); - let result = TypeParser::new().parse(0, lexer); - result + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_type() }; - println!("cons result: {:?}", parse_type("Cons")); assert!(matches!( parse_type("Cons"), - Ok(Type::Application(cons, empty)) if + Ok(Type::Application(cons, empty)) if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && empty.is_empty() )); @@ -293,6 +282,9 @@ fn can_parse_types() { matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] if b1 == "a" && b2 == "b") )); + println!("------"); + println!("result: {:?}", parse_type("a -> z")); + println!("------"); assert!(matches!( parse_type("a -> z"), Ok(Type::Function(a, z)) diff --git a/src/syntax/error.rs b/src/syntax/error.rs index eccef47..35924b9 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -7,6 +7,17 @@ use thiserror::Error; pub enum ParserError { #[error("Lexer error at {file_id}: {error}")] LexerError { file_id: usize, error: LexerError }, + + #[error("Unacceptable end of file at {file_id} while {place}")] + UnacceptableEof { file_id: usize, place: &'static str }, + + #[error("Unexpected token at {file_id}: expected {expected}, saw {token}")] + UnexpectedToken { + file_id: usize, + span: Range, + token: Token, + expected: &'static str, + }, } #[derive(Clone, Debug, Error, PartialEq)] diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs new file mode 100644 index 0000000..983bde7 --- /dev/null +++ b/src/syntax/parse.rs @@ -0,0 +1,172 @@ +use crate::syntax::error::ParserError; +use crate::syntax::tokens::{Lexer, LocatedToken, Token}; +use crate::syntax::*; + +pub struct Parser<'a> { + file_id: usize, + lexer: Lexer<'a>, + known_tokens: Vec, +} + +impl<'a> Parser<'a> { + pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> { + Parser { + file_id, + lexer, + known_tokens: vec![], + } + } + + /// Get the next token. + pub fn next(&mut self) -> Result, ParserError> { + let result = self.known_tokens.pop(); + + if result.is_some() { + Ok(result) + } else { + self.lexer + .next() + .transpose() + .map_err(|error| ParserError::LexerError { + file_id: self.file_id, + error, + }) + } + } + + fn save(&mut self, token: LocatedToken) { + self.known_tokens.push(token) + } + + fn bad_eof(&mut self, place: &'static str) -> ParserError { + ParserError::UnacceptableEof { + file_id: self.file_id, + place, + } + } + + fn to_location(&self, span: Range) -> Location { + Location { + file_id: self.file_id, + span, + } + } + + pub fn parse_type(&mut self) -> Result { + self.parse_function_type() + } + + fn parse_function_type(&mut self) -> Result { + let mut args = Vec::new(); + + while let Ok(t) = self.parse_type_application() { + println!("got argument type: {t:?}"); + args.push(t); + } + + let Some(maybe_arrow) = self.next()? else { + println!("no arrow token"); + match args.pop() { + None => { + return Err(ParserError::UnacceptableEof { + file_id: self.file_id, + place: "parsing function type or type", + }); + } + + Some(t) if args.len() == 0 => return Ok(t), + + Some(_) => { + return Err(ParserError::UnacceptableEof { + file_id: self.file_id, + place: "looking for '->' in function type", + }); + } + } + }; + + if maybe_arrow.token == Token::Arrow { + println!("found function arrow"); + let right = self.parse_function_type()?; + Ok(Type::Function(args, Box::new(right))) + } else if args.len() == 1 { + println!("found non function arrow token {}", maybe_arrow.token); + Ok(args.pop().expect("length = 1 works")) + } else { + self.save(maybe_arrow.clone()); + let LocatedToken { token, span } = maybe_arrow; + + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "'->' in function type", + }) + } + } + + fn parse_type_application(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + let constructor = match token { + Token::TypeName(x) => Type::Constructor(self.to_location(span), x), + Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x), + _ => { + println!("saving {token}"); + self.save(LocatedToken { token, span }); + return self.parse_base_type(); + } + }; + + let mut args = vec![]; + + while let Ok(next_arg) = self.parse_base_type() { + args.push(next_arg); + } + + Ok(Type::Application(Box::new(constructor), args)) + } + + fn parse_base_type(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + match token { + Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)), + Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)), + Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)), + token => { + self.save(LocatedToken { + token: token.clone(), + span: span.clone(), + }); + + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "type constructor, type variable, or primitive type", + }) + } + } + } + + pub fn parse_constant(&mut self) -> Result { + let LocatedToken { token, span } = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a constant"))?; + + match token { + Token::Integer(iwb) => Ok(ConstantValue::Integer(self.to_location(span), iwb)), + Token::Character(c) => Ok(ConstantValue::Character(self.to_location(span), c)), + Token::String(s) => Ok(ConstantValue::String(self.to_location(span), s)), + _ => Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span, + token, + expected: "constant value", + }), + } + } +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index c9cd846..08f5885 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -2,8 +2,15 @@ use crate::syntax::IntegerWithBase; use crate::syntax::error::LexerError; use proptest_derive::Arbitrary; use std::fmt; +use std::ops::Range; use std::str::CharIndices; +#[derive(Clone)] +pub struct LocatedToken { + pub token: Token, + pub span: Range, +} + /// A single token of the input stream; used to help the parsing function over /// more concrete things than bytes. /// @@ -92,7 +99,6 @@ struct LexerState<'a> { impl<'a> From<&'a str> for Lexer<'a> { fn from(value: &'a str) -> Self { - println!("LEXING '{value}'"); Lexer::Working(LexerState { stream: value.char_indices(), buffer: None, @@ -110,7 +116,7 @@ impl<'a> Lexer<'a> { } impl<'a> Iterator for Lexer<'a> { - type Item = Result<(usize, Token, usize), LexerError>; + type Item = Result; fn next(&mut self) -> Option { match self { @@ -124,15 +130,11 @@ impl<'a> Iterator for Lexer<'a> { } Ok(None) => { - println!("LEXER DONE"); *self = Lexer::Done(state.stream.offset()); None } - Ok(Some((start, token, end))) => { - println!("TOKEN: {:?}", token); - Some(Ok((start, token, end))) - } + Ok(Some(ltoken)) => Some(Ok(ltoken)), }, } } @@ -141,24 +143,26 @@ impl<'a> Iterator for Lexer<'a> { impl<'a> LexerState<'a> { fn next_char(&mut self) -> Option<(usize, char)> { let result = self.buffer.take().or_else(|| self.stream.next()); - println!("next_char() -> {result:?}"); result } fn stash_char(&mut self, idx: usize, c: char) { - println!("stash_char({idx}, {c})"); assert!(self.buffer.is_none()); self.buffer = Some((idx, c)); } - fn next_token(&mut self) -> Result, LexerError> { + fn next_token(&mut self) -> Result, LexerError> { while let Some((token_start_offset, char)) = self.next_char() { if char.is_whitespace() { continue; } - let simple_response = - |token| Ok(Some((token_start_offset, token, self.stream.offset()))); + let simple_response = |token| { + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) + }; match char { '(' => return simple_response(Token::OpenParen), @@ -219,14 +223,17 @@ impl<'a> LexerState<'a> { fn starts_with_zero( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { match self.next_char() { None => { let token = Token::Integer(IntegerWithBase { base: None, value: 0, }); - Ok(Some((token_start_offset, token, self.stream.offset()))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) } Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), @@ -243,7 +250,10 @@ impl<'a> LexerState<'a> { base: None, value: 0, }); - Ok(Some((token_start_offset, token, offset))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..offset, + })) } } } @@ -255,7 +265,7 @@ impl<'a> LexerState<'a> { base: u32, provided_base: Option, mut value: u64, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut end_offset = self.stream.offset(); while let Some((offset, c)) = self.next_char() { @@ -273,7 +283,10 @@ impl<'a> LexerState<'a> { value, }); - Ok(Some((token_start_offset, token, end_offset))) + Ok(Some(LocatedToken { + token, + span: token_start_offset..end_offset, + })) } fn parse_identifier( @@ -282,7 +295,7 @@ impl<'a> LexerState<'a> { mut identifier: String, mut allowed_character: fn(char) -> bool, mut builder: fn(String) -> Token, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut end_offset = self.stream.offset(); while let Some((offset, c)) = self.next_char() { @@ -321,13 +334,16 @@ impl<'a> LexerState<'a> { } } - Ok(Some((token_start_offset, builder(identifier), end_offset))) + Ok(Some(LocatedToken { + token: builder(identifier), + span: token_start_offset..end_offset, + })) } fn starts_with_single( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let Some((_, mut char)) = self.next_char() else { return Err(LexerError::UnfinishedCharacter { span: token_start_offset..self.stream.offset(), @@ -351,7 +367,10 @@ impl<'a> LexerState<'a> { }); } - Ok(Some((token_start_offset, Token::Character(char), idx))) + Ok(Some(LocatedToken { + token: Token::Character(char), + span: token_start_offset..idx, + })) } fn get_escaped_character(&mut self, token_start_offset: usize) -> Result { @@ -425,12 +444,17 @@ impl<'a> LexerState<'a> { fn starts_with_double( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { let mut result = String::new(); while let Some((idx, char)) = self.next_char() { match char { - '"' => return Ok(Some((token_start_offset, Token::String(result), idx))), + '"' => { + return Ok(Some(LocatedToken { + token: Token::String(result), + span: token_start_offset..idx, + })); + } '\\' => result.push(self.get_escaped_character(idx)?), @@ -446,12 +470,18 @@ impl<'a> LexerState<'a> { fn starts_with_dash( &mut self, token_start_offset: usize, - ) -> Result, LexerError> { + ) -> Result, LexerError> { match self.next_char() { - None => Ok(Some((token_start_offset, Token::OperatorName("-".into()), token_start_offset))), - Some((end, '>')) => Ok(Some((token_start_offset, Token::Arrow, end))), - Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => - self.parse_identifier( + None => Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..token_start_offset + 1, + })), + Some((end, '>')) => Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })), + Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => self + .parse_identifier( token_start_offset, format!("-{c}"), |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), @@ -459,7 +489,10 @@ impl<'a> LexerState<'a> { ), Some((idx, c)) => { self.stash_char(idx, c); - Ok(Some((token_start_offset, Token::OperatorName("-".into()), idx))) + Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..idx, + })) } } } @@ -474,7 +507,7 @@ proptest::proptest! { let initial_token = tokens.next() .expect("Can get a token without an error.") .expect("Can get a valid token.") - .1; + .token; proptest::prop_assert_eq!(token, initial_token); proptest::prop_assert!(tokens.next().is_none()); @@ -488,7 +521,7 @@ fn parsed_single_token(s: &str) -> Token { .next() .expect(format!("Can get at least one token from {s:?}").as_str()) .expect("Can get a valid token.") - .1; + .token; assert!( tokens.next().is_none(), @@ -608,7 +641,7 @@ fn operators_work_as_expected() { #[test] fn can_separate_pieces() { let mut lexer = Lexer::from("a-b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token()); @@ -616,7 +649,7 @@ fn can_separate_pieces() { assert_eq!(None, next_token()); let mut lexer = Lexer::from("a--b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("--".into())), next_token()); @@ -624,7 +657,7 @@ fn can_separate_pieces() { assert_eq!(None, next_token()); let mut lexer = Lexer::from("a - -b"); - let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::ValueName("a".into())), next_token()); assert_eq!(Some(Token::OperatorName("-".into())), next_token()); -- 2.53.0 From e9fb4fcd0f05c4f9ab06cd61f8011eec625bbc24 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sun, 7 Sep 2025 20:48:19 -0700 Subject: [PATCH 09/33] Ignore proptest droppings. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9c98879..70e8cf0 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,6 @@ bang # Added by cargo - +/proptest-regressions /target .aider* -- 2.53.0 From 4362d8203459a22bf30d23c4ff933b53cf038e58 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Fri, 26 Sep 2025 09:24:56 -0700 Subject: [PATCH 10/33] Most base expressions work. --- Cargo.lock | 271 ++++---------- src/syntax.rs | 221 ++++-------- src/syntax/location.rs | 48 +++ src/syntax/name.rs | 60 ++++ src/syntax/parse.rs | 703 ++++++++++++++++++++++++++++++++++++- src/syntax/parser_tests.rs | 519 +++++++++++++++++++++++++++ 6 files changed, 1449 insertions(+), 373 deletions(-) create mode 100644 src/syntax/location.rs create mode 100644 src/syntax/name.rs create mode 100644 src/syntax/parser_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 38cd685..a80b059 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,15 +36,15 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "codespan" @@ -69,12 +69,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys", ] [[package]] @@ -109,15 +109,15 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.175" +version = "0.2.176" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" [[package]] name = "linux-raw-sys" -version = "0.9.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "num-traits" @@ -145,18 +145,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" +checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" dependencies = [ "bit-set", "bit-vec", @@ -244,21 +244,21 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "rustix" -version = "1.0.8" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys", ] [[package]] @@ -275,18 +275,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.227" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.227" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.227" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04" dependencies = [ "proc-macro2", "quote", @@ -295,9 +305,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.104" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -306,15 +316,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.20.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", "getrandom", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys", ] [[package]] @@ -328,18 +338,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.14" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.14" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" dependencies = [ "proc-macro2", "quote", @@ -354,15 +364,15 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" [[package]] name = "wait-timeout" @@ -375,191 +385,66 @@ dependencies = [ [[package]] name = "wasi" -version = "0.14.2+wasi-0.2.4" +version = "0.14.7+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" dependencies = [ - "wit-bindgen-rt", + "wasip2", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", ] [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys", ] +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.61.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" dependencies = [ - "windows-targets 0.52.6", + "windows-link", ] [[package]] -name = "windows-sys" -version = "0.60.2" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.2", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" -dependencies = [ - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", diff --git a/src/syntax.rs b/src/syntax.rs index dc3f7c4..fac81e6 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,46 +1,17 @@ mod error; +mod location; +mod name; mod parse; +#[cfg(test)] +mod parser_tests; pub mod tokens; -#[cfg(test)] -use crate::syntax::error::ParserError; -use crate::syntax::parse::Parser; -#[cfg(test)] -use crate::syntax::tokens::Lexer; -use codespan_reporting::diagnostic::Label; +pub use location::{Located, Location}; +pub use name::Name; use proptest_derive::Arbitrary; -use std::cmp::{max, min}; use std::fmt::Debug; use std::ops::Range; -#[derive(Debug)] -pub struct Location { - file_id: usize, - span: Range, -} - -impl Location { - pub fn new(file_id: usize, span: Range) -> Self { - Location { file_id, span } - } - - pub fn extend_to(&self, other: &Location) -> Location { - assert_eq!(self.file_id, other.file_id); - Location { - file_id: self.file_id, - span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), - } - } - - pub fn primary_label(&self) -> Label { - Label::primary(self.file_id, self.span.clone()) - } - - pub fn secondary_label(&self) -> Label { - Label::secondary(self.file_id, self.span.clone()) - } -} - #[derive(Debug)] pub struct Module { definitions: Vec, @@ -54,6 +25,12 @@ pub struct Definition { definition: Def, } +impl Located for Definition { + fn location(&self) -> Location { + self.location.clone() + } +} + #[derive(Debug)] pub enum Def { Enumeration(EnumerationDef), @@ -62,28 +39,29 @@ pub enum Def { Value(ValueDef), } -impl Def { - fn location(&self) -> &Location { +impl Located for Def { + fn location(&self) -> Location { match self { - Def::Enumeration(def) => &def.location, - Def::Structure(def) => &def.location, - Def::Function(def) => &def.location, - Def::Value(def) => &def.location, + Def::Enumeration(def) => def.location.clone(), + Def::Structure(def) => def.location.clone(), + Def::Function(def) => def.location.clone(), + Def::Value(def) => def.location.clone(), } } } #[derive(Debug)] pub struct EnumerationDef { + name: String, location: Location, - options: Vec, + variants: Vec, } #[derive(Debug)] pub struct EnumerationVariant { location: Location, name: String, - arguments: Vec, + argument: Option, } #[derive(Debug)] @@ -95,8 +73,10 @@ pub struct StructureDef { #[derive(Debug)] pub struct StructureField { + location: Location, + export: ExportClass, name: String, - field_type: Type, + field_type: Option, } #[derive(Debug)] @@ -118,7 +98,7 @@ pub struct FunctionArg { pub struct ValueDef { name: String, location: Location, - value: Value, + value: Expression, } #[derive(Debug)] @@ -142,7 +122,16 @@ pub struct BindingStmt { #[derive(Debug)] pub enum Expression { - Value(Value), + Value(ConstantValue), + Reference(Name), + EnumerationValue(Name, Name, Option>), + StructureValue(Name, Vec), +} + +#[derive(Debug)] +pub struct FieldValue { + field: Name, + value: Expression, } #[derive(Debug)] @@ -160,9 +149,8 @@ impl TypeRestrictions { #[derive(Debug)] pub struct TypeRestriction { - location: Location, - class: String, - variables: Vec, + constructor: Type, + arguments: Vec, } #[derive(Debug)] @@ -174,9 +162,28 @@ pub enum Type { Function(Vec, Box), } -#[derive(Debug)] -pub enum Value { - Constant(ConstantValue), +impl Located for Type { + fn location(&self) -> Location { + match self { + Type::Constructor(l, _) => l.clone(), + Type::Variable(l, _) => l.clone(), + Type::Primitive(l, _) => l.clone(), + Type::Application(t1, ts) => { + let mut result = t1.location(); + if let Some(last) = ts.last() { + result = result.extend_to(&last.location()); + } + result + } + Type::Function(args, ret) => { + if let Some(first) = args.first() { + first.location().extend_to(&ret.location()) + } else { + ret.location() + } + } + } + } } #[derive(Debug)] @@ -198,113 +205,3 @@ pub struct IntegerWithBase { base: Option, value: u64, } - -#[test] -fn can_parse_constants() { - let parse_constant = |str| { - let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); - result.parse_constant() - }; - - assert!(matches!( - parse_constant("16"), - Ok(ConstantValue::Integer( - _, - IntegerWithBase { - base: None, - value: 16, - } - )) - )); - assert!(matches!( - parse_constant("0x10"), - Ok(ConstantValue::Integer( - _, - IntegerWithBase { - base: Some(16), - value: 16, - } - )) - )); - assert!(matches!( - parse_constant("0o20"), - Ok(ConstantValue::Integer( - _, - IntegerWithBase { - base: Some(8), - value: 16, - } - )) - )); - assert!(matches!( - parse_constant("0b10000"), - Ok(ConstantValue::Integer( - _, - IntegerWithBase { - base: Some(2), - value: 16, - } - )) - )); - assert!( - matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x)) - if x == "foo") - ); - assert!(matches!( - parse_constant("'f'"), - Ok(ConstantValue::Character(_, 'f')) - )); -} - -#[test] -fn can_parse_types() { - let parse_type = |str| { - let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); - result.parse_type() - }; - - assert!(matches!( - parse_type("Cons"), - Ok(Type::Application(cons, empty)) if - matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && - empty.is_empty() - )); - assert!(matches!( - parse_type("cons"), - Ok(Type::Variable(_, c)) if c == "cons" - )); - assert!(matches!( - parse_type("Cons a b"), - Ok(Type::Application(a, b)) - if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") && - matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] - if b1 == "a" && b2 == "b") - )); - println!("------"); - println!("result: {:?}", parse_type("a -> z")); - println!("------"); - assert!(matches!( - parse_type("a -> z"), - Ok(Type::Function(a, z)) - if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") - )); - assert!(matches!( - parse_type("a b -> z"), - Ok(Type::Function(a, z)) - if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)] - if a1 == "a" && b1 == "b") && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") - )); - assert!(matches!( - parse_type("Cons a b -> z"), - Ok(Type::Function(a, z)) - if matches!(a.as_slice(), [Type::Application(cons, appargs)] - if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && - matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] - if b1 == "a" && b2 == "b")) && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") - )); -} diff --git a/src/syntax/location.rs b/src/syntax/location.rs new file mode 100644 index 0000000..fe4a352 --- /dev/null +++ b/src/syntax/location.rs @@ -0,0 +1,48 @@ +use codespan_reporting::diagnostic::Label; +use std::cmp::{max, min}; +use std::ops::Range; + +pub trait Located { + fn location(&self) -> Location; +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Location { + file_id: usize, + span: Range, +} + +impl Location { + pub fn new(file_id: usize, span: Range) -> Self { + Location { file_id, span } + } + + pub fn extend_to(&self, other: &Location) -> Location { + assert_eq!(self.file_id, other.file_id); + Location { + file_id: self.file_id, + span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), + } + } + + pub fn merge_span(mut self, span: Range) -> Location { + self.span = min(self.span.start, span.start)..max(self.span.end, span.end); + self + } + + pub fn file_id(&self) -> usize { + self.file_id + } + + pub fn span(&self) -> Range { + self.span.clone() + } + + pub fn primary_label(&self) -> Label { + Label::primary(self.file_id, self.span.clone()) + } + + pub fn secondary_label(&self) -> Label { + Label::secondary(self.file_id, self.span.clone()) + } +} diff --git a/src/syntax/name.rs b/src/syntax/name.rs new file mode 100644 index 0000000..5dd90e0 --- /dev/null +++ b/src/syntax/name.rs @@ -0,0 +1,60 @@ +use crate::syntax::{Located, Location}; +use std::cmp; +use std::fmt; +use std::hash; +use std::sync::atomic::{AtomicU64, Ordering}; + +static IDENTIFIER_COUNTER: AtomicU64 = AtomicU64::new(0); + +#[derive(Debug)] +pub struct Name { + printable: String, + identifier: u64, + location: Option, +} + +impl cmp::PartialEq for Name { + fn eq(&self, other: &Self) -> bool { + self.identifier == other.identifier + } +} + +impl cmp::Eq for Name {} + +impl hash::Hash for Name { + fn hash(&self, state: &mut H) { + self.identifier.hash(state); + } +} + +impl fmt::Display for Name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.printable, self.identifier) + } +} + +impl Name { + pub fn new(location: Location, s: S) -> Name { + let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst); + Name { + printable: s.to_string(), + identifier: my_id, + location: Some(location), + } + } + + pub fn gensym(base: &'static str) -> Name { + let formatted = format!("<{base}>"); + let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst); + + Name { + printable: formatted, + identifier: my_id, + location: None, + } + } + + pub fn as_printed(&self) -> &str { + self.printable.as_str() + } +} diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 983bde7..0ee635c 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -46,9 +46,651 @@ impl<'a> Parser<'a> { } fn to_location(&self, span: Range) -> Location { - Location { + Location::new(self.file_id, span) + } + + pub fn parse_module(&mut self) -> Result { + let mut definitions = vec![]; + + loop { + let next_token = self.next()?; + + if next_token.is_none() { + return Ok(Module { definitions }); + } + + definitions.push(self.parse_definition()?); + } + } + + pub fn parse_definition(&mut self) -> Result { + let (export, start) = self.parse_export_class()?; + let type_restrictions = self.parse_type_restrictions()?; + let definition = self.parse_def()?; + let location = definition.location().merge_span(start); + + Ok(Definition { + location, + export, + type_restrictions, + definition, + }) + } + + fn parse_export_class(&mut self) -> Result<(ExportClass, Range), ParserError> { + let maybe_export = self + .next()? + .ok_or_else(|| self.bad_eof("looking for possible export"))?; + + if matches!(maybe_export.token, Token::ValueName(ref x) if x == "export") { + Ok((ExportClass::Public, maybe_export.span)) + } else { + let start = maybe_export.span.clone(); + self.save(maybe_export); + Ok((ExportClass::Private, start)) + } + } + + pub fn parse_type_restrictions(&mut self) -> Result { + let Some(maybe_restrict) = self.next()? else { + return Ok(TypeRestrictions::empty()); + }; + + if !matches!(maybe_restrict.token, Token::ValueName(ref x) if x == "restrict") { + self.save(maybe_restrict); + return Ok(TypeRestrictions::empty()); + } + + let maybe_paren = self + .next()? + .ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?; + + if !matches!(maybe_paren.token, Token::OpenParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_paren.span, + token: maybe_paren.token, + expected: "open parenthesis, following the restrict keyword", + }); + } + + let mut restrictions = vec![]; + + while let Some(type_restriction) = self.parse_type_restriction()? { + restrictions.push(type_restriction); + } + + let maybe_paren = self + .next()? + .ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?; + if !matches!(maybe_paren.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_paren.span, + token: maybe_paren.token, + expected: "close parenthesis following type restrictions", + }); + } + + Ok(TypeRestrictions { restrictions }) + } + + fn parse_type_restriction(&mut self) -> Result, ParserError> { + let maybe_constructor = self + .next()? + .ok_or_else(|| self.bad_eof("Looking for constructor for type restriction"))?; + + let constructor = match maybe_constructor.token { + Token::TypeName(str) => { + Type::Constructor(self.to_location(maybe_constructor.span), str) + } + Token::PrimitiveTypeName(str) => { + Type::Primitive(self.to_location(maybe_constructor.span), str) + } + + token @ Token::CloseParen | token @ Token::Comma => { + self.save(LocatedToken { + token, + span: maybe_constructor.span, + }); + return Ok(None); + } + + weird => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_constructor.span, + token: weird, + expected: "Constructor name, comma, or close parenthesis in type restriction", + }); + } + }; + + let mut arguments = vec![]; + + while let Ok(t) = self.parse_base_type() { + arguments.push(t); + } + + let restriction = TypeRestriction { + constructor, + arguments, + }; + + let Some(maybe_comma) = self.next()? else { + return Ok(Some(restriction)); + }; + + match maybe_comma.token { + Token::Comma => {} + _ => self.save(maybe_comma), + } + + Ok(Some(restriction)) + } + + fn parse_def(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for definition body"))?; + + if let Ok(structure) = self.parse_structure() { + return Ok(Def::Structure(structure)); + } + + if let Ok(enumeration) = self.parse_enumeration() { + return Ok(Def::Enumeration(enumeration)); + } + + if let Ok(fun_or_val) = self.parse_function_or_value() { + return Ok(fun_or_val); + } + + Err(ParserError::UnexpectedToken { file_id: self.file_id, - span, + span: next.span, + token: next.token, + expected: "'structure', 'enumeration', or a value identifier", + }) + } + + pub fn parse_structure(&mut self) -> Result { + let structure_token = self + .next()? + .ok_or_else(|| self.bad_eof("looking for definition"))?; + if !matches!(structure_token.token, Token::ValueName(ref s) if s == "structure") { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: structure_token.span, + token: structure_token.token, + expected: "the 'structure' keyword", + }); + } + + let name = self + .next()? + .ok_or_else(|| self.bad_eof("looking for structure name"))?; + let structure_name = match name.token { + Token::TypeName(str) => str, + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: name.span, + token: name.token, + expected: "a structure name", + }); + } + }; + + let brace = self + .next()? + .ok_or_else(|| self.bad_eof("the open brace after a structure name"))?; + if !matches!(brace.token, Token::OpenBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: brace.span, + token: brace.token, + expected: "the brace after a structure name", + }); + } + + let mut fields = vec![]; + + while let Some(field_definition) = self.parse_field_definition()? { + fields.push(field_definition); + } + + let brace = self.next()?.ok_or_else(|| { + self.bad_eof("the close brace after at the end of a structure definition") + })?; + if !matches!(brace.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: brace.span, + token: brace.token, + expected: "the brace at the end of a structure definition", + }); + } + + let location = self + .to_location(structure_token.span) + .extend_to(&self.to_location(brace.span)); + + Ok(StructureDef { + name: structure_name, + location, + fields, + }) + } + + pub fn parse_field_value(&mut self) -> Result, ParserError> { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof("parsing field definition"))?; + + let field = match maybe_name.token { + Token::ValueName(x) => Name::new(self.to_location(maybe_name.span), x), + _ => { + self.save(maybe_name.clone()); + return Ok(None); + } + }; + + let maybe_colon = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, comma, or close brace after field name") + })?; + if !matches!(maybe_colon.token, Token::Colon) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_colon.span, + token: maybe_colon.token, + expected: "colon after field name in constructor", + }); + } + + let value = self.parse_expression()?; + + let end_token = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after field definition") + })?; + if !matches!(end_token.token, Token::Comma) { + self.save(end_token); + } + + Ok(Some(FieldValue { field, value })) + } + + pub fn parse_field_definition(&mut self) -> Result, ParserError> { + let (export, start) = self.parse_export_class()?; + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof("parsing field definition"))?; + + let name = match maybe_name.token { + Token::ValueName(x) => x, + _ => { + self.save(maybe_name.clone()); + if matches!(export, ExportClass::Private) { + return Ok(None); + } else { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_name.span, + token: maybe_name.token, + expected: "a field name", + }); + } + } + }; + let start_location = self.to_location(start); + + let maybe_colon = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, comma, or close brace after field name") + })?; + + let field_type = match maybe_colon.token { + Token::Comma | Token::CloseBrace => { + self.save(maybe_colon); + None + } + + Token::Colon => Some(self.parse_type()?), + + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_colon.span, + token: maybe_colon.token, + expected: "colon, comma, or close brace after field name", + }); + } + }; + + let end_token = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after field definition") + })?; + + let maybe_end_location = match end_token.token { + Token::Comma => Some(self.to_location(end_token.span)), + Token::CloseBrace => { + self.save(end_token); + None + } + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: end_token.span, + token: end_token.token, + expected: "looking for comma or close brace after field definition", + }); + } + }; + + let end_location = maybe_end_location + .or_else(|| field_type.as_ref().map(|x| x.location())) + .unwrap_or_else(|| self.to_location(maybe_name.span)); + let location = start_location.extend_to(&end_location); + + Ok(Some(StructureField { + location, + export, + name, + field_type, + })) + } + + pub fn parse_enumeration(&mut self) -> Result { + let enumeration_token = self + .next()? + .ok_or_else(|| self.bad_eof("looking for definition"))?; + if !matches!(enumeration_token.token, Token::ValueName(ref e) if e == "enumeration") { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: enumeration_token.span, + token: enumeration_token.token, + expected: "the 'enumeration' keyword", + }); + } + + let name = self + .next()? + .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; + let enumeration_name = match name.token { + Token::TypeName(str) => str, + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: name.span, + token: name.token, + expected: "an enumeration name", + }); + } + }; + + let brace = self + .next()? + .ok_or_else(|| self.bad_eof("the open brace after an enumeration name"))?; + if !matches!(brace.token, Token::OpenBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: brace.span, + token: brace.token, + expected: "the brace after an enumeration name", + }); + } + + let mut variants = vec![]; + + while let Some(variant_definition) = self.parse_enum_variant()? { + variants.push(variant_definition); + } + + let brace = self.next()?.ok_or_else(|| { + self.bad_eof("the close brace after at the end of an enumeration definition") + })?; + if !matches!(brace.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: brace.span, + token: brace.token, + expected: "the brace at the end of an enumeration definition", + }); + } + + let location = self + .to_location(enumeration_token.span) + .extend_to(&self.to_location(brace.span)); + + Ok(EnumerationDef { + name: enumeration_name, + location, + variants, + }) + } + + pub fn parse_enum_variant(&mut self) -> Result, ParserError> { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; + let name = match maybe_name.token { + Token::TypeName(x) => x, + Token::CloseBrace => { + self.save(maybe_name); + return Ok(None); + } + _ => { + self.save(maybe_name.clone()); + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_name.span, + token: maybe_name.token, + expected: "variant name (identifier starting with a capital)", + }); + } + }; + let start_location = self.to_location(maybe_name.span); + + let maybe_paren = self + .next()? + .ok_or_else(|| self.bad_eof("trying to understand enumeration variant"))?; + let (argument, arg_location) = if matches!(maybe_paren.token, Token::OpenParen) { + let t = self.parse_type()?; + + let maybe_close = self + .next()? + .ok_or_else(|| self.bad_eof("trying to parse a enumeration variant's type"))?; + if !matches!(maybe_close.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_close.span, + token: maybe_close.token, + expected: "close paren to end an enumeration variant's type argument", + }); + } + + let location = t.location(); + (Some(t), location) + } else { + self.save(maybe_paren); + (None, start_location.clone()) + }; + + let ender = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after enumeration variant") + })?; + let end_location = match ender.token { + Token::Comma => self.to_location(ender.span), + Token::CloseBrace => { + self.save(ender); + arg_location + } + _ => { + self.save(ender.clone()); + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: ender.span, + token: ender.token, + expected: "comma or close brace after enumeration variant", + }); + } + }; + + let location = start_location.extend_to(&end_location); + + Ok(Some(EnumerationVariant { + name, + location, + argument, + })) + } + + pub fn parse_function_or_value(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_expression(&mut self) -> Result { + self.parse_base_expression() + } + + pub fn parse_base_expression(&mut self) -> Result { + if let Ok(v) = self.parse_constant() { + return Ok(Expression::Value(v)); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an expression"))?; + + match next.token { + Token::OpenBrace => unimplemented!(), + + Token::OpenParen => { + let inner = self.parse_expression()?; + let hopefully_close = self + .next()? + .ok_or_else(|| self.bad_eof("looking for close paren to finish expression"))?; + if matches!(hopefully_close.token, Token::CloseParen) { + Ok(inner) + } else { + self.save(hopefully_close.clone()); + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: hopefully_close.span, + token: hopefully_close.token, + expected: "close paren after expression", + }) + } + } + + Token::TypeName(n) | Token::PrimitiveTypeName(n) => { + let type_name = Name::new(self.to_location(next.span), n); + let after_type_name = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, open brace, or open paren in constructor") + })?; + + match after_type_name.token { + Token::OpenBrace => { + let mut fields = vec![]; + + while let Some(field) = self.parse_field_value()? { + fields.push(field); + } + + let closer = self.next()?.ok_or_else(|| { + self.bad_eof("looking for close brace in structure value") + })?; + if !matches!(closer.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: closer.span, + token: closer.token, + expected: "close brace or comma after field value", + }); + } + + Ok(Expression::StructureValue(type_name, fields)) + } + + Token::Colon => { + let second_colon = self.next()?.ok_or_else(|| { + self.bad_eof("looking for second colon in enumeration value") + })?; + if !matches!(second_colon.token, Token::Colon) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: second_colon.span, + token: second_colon.token, + expected: "second colon in enumeration value", + }); + } + + let vname = self + .next()? + .ok_or_else(|| self.bad_eof("looking for enumeration value name"))?; + + let value_name = match vname.token { + Token::TypeName(s) => { + let loc = self.to_location(vname.span); + Name::new(loc, s) + } + + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: vname.span, + token: vname.token, + expected: "enumeration value name", + }); + } + }; + + let arg = if let Some(maybe_paren) = self.next()? { + let expr = self.parse_expression()?; + + let tok = self.next()?.ok_or_else(|| { + self.bad_eof("looking for close paren after enum value argument") + })?; + if !matches!(tok.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: tok.span, + token: tok.token, + expected: "close paren after enum value argument", + }); + } + + Some(Box::new(expr)) + } else { + None + }; + + Ok(Expression::EnumerationValue(type_name, value_name, arg)) + } + + _ => Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: after_type_name.span, + token: after_type_name.token, + expected: "colon, open brace, or open paren in constructor", + }), + } + } + + Token::ValueName(n) | Token::PrimitiveValueName(n) => { + let location = self.to_location(next.span); + let name = Name::new(location, n); + Ok(Expression::Reference(name)) + } + + _ => { + self.save(next.clone()); + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "some base expression or an open brace", + }) + } } } @@ -60,12 +702,10 @@ impl<'a> Parser<'a> { let mut args = Vec::new(); while let Ok(t) = self.parse_type_application() { - println!("got argument type: {t:?}"); args.push(t); } let Some(maybe_arrow) = self.next()? else { - println!("no arrow token"); match args.pop() { None => { return Err(ParserError::UnacceptableEof { @@ -86,11 +726,10 @@ impl<'a> Parser<'a> { }; if maybe_arrow.token == Token::Arrow { - println!("found function arrow"); let right = self.parse_function_type()?; Ok(Type::Function(args, Box::new(right))) } else if args.len() == 1 { - println!("found non function arrow token {}", maybe_arrow.token); + self.save(maybe_arrow); Ok(args.pop().expect("length = 1 works")) } else { self.save(maybe_arrow.clone()); @@ -113,7 +752,6 @@ impl<'a> Parser<'a> { Token::TypeName(x) => Type::Constructor(self.to_location(span), x), Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x), _ => { - println!("saving {token}"); self.save(LocatedToken { token, span }); return self.parse_base_type(); } @@ -136,6 +774,23 @@ impl<'a> Parser<'a> { Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)), Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)), Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)), + Token::OpenParen => { + let t = self.parse_type()?; + let closer = self + .next()? + .ok_or_else(|| self.bad_eof("close paren in type"))?; + + if !matches!(closer.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: closer.span, + token: closer.token, + expected: "close parenthesis to finish a type", + }); + } + + Ok(t) + } token => { self.save(LocatedToken { token: token.clone(), @@ -153,20 +808,32 @@ impl<'a> Parser<'a> { } pub fn parse_constant(&mut self) -> Result { - let LocatedToken { token, span } = self + let maybe_constant = self .next()? .ok_or_else(|| self.bad_eof("looking for a constant"))?; - match token { - Token::Integer(iwb) => Ok(ConstantValue::Integer(self.to_location(span), iwb)), - Token::Character(c) => Ok(ConstantValue::Character(self.to_location(span), c)), - Token::String(s) => Ok(ConstantValue::String(self.to_location(span), s)), - _ => Err(ParserError::UnexpectedToken { - file_id: self.file_id, - span, - token, - expected: "constant value", - }), + match maybe_constant.token { + Token::Integer(iwb) => Ok(ConstantValue::Integer( + self.to_location(maybe_constant.span), + iwb, + )), + Token::Character(c) => Ok(ConstantValue::Character( + self.to_location(maybe_constant.span), + c, + )), + Token::String(s) => Ok(ConstantValue::String( + self.to_location(maybe_constant.span), + s, + )), + _ => { + self.save(maybe_constant.clone()); + Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: maybe_constant.span, + token: maybe_constant.token, + expected: "constant value", + }) + } } } } diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs new file mode 100644 index 0000000..bb3ce5a --- /dev/null +++ b/src/syntax/parser_tests.rs @@ -0,0 +1,519 @@ +use crate::syntax::parse::Parser; +use crate::syntax::tokens::Lexer; +use crate::syntax::*; + +#[test] +fn constants() { + let parse_constant = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_constant() + }; + + assert!(matches!( + parse_constant("16"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: None, + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0x10"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(16), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0o20"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(8), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0b10000"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(2), + value: 16, + } + )) + )); + assert!( + matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x)) + if x == "foo") + ); + assert!(matches!( + parse_constant("'f'"), + Ok(ConstantValue::Character(_, 'f')) + )); +} + +#[test] +fn types() { + let parse_type = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_type() + }; + + assert!(matches!( + parse_type("Cons"), + Ok(Type::Application(cons, empty)) if + matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + empty.is_empty() + )); + assert!(matches!( + parse_type("cons"), + Ok(Type::Variable(_, c)) if c == "cons" + )); + assert!(matches!( + parse_type("Cons a b"), + Ok(Type::Application(a, b)) + if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1 == "a" && b2 == "b") + )); + assert!(matches!( + parse_type("a -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); + println!("-------------"); + println!("{:?}", parse_type("(a -> z)")); + println!("-------------"); + assert!(matches!( + parse_type("(a -> z)"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); + assert!(matches!( + parse_type("a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)] + if a1 == "a" && b1 == "b") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); + assert!(matches!( + parse_type("Cons a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Application(cons, appargs)] + if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1 == "a" && b2 == "b")) && + matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + )); +} + +#[test] +fn type_restrictions() { + let parse_tr = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_type_restrictions() + }; + + assert!(matches!( + parse_tr("restrict()"), + Ok(TypeRestrictions{ restrictions }) if restrictions.is_empty() + )); + + assert!(matches!( + parse_tr("restrict(Cons a b)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x == "b")))); + + assert!(matches!( + parse_tr("restrict(Cons a b,)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x == "b")))); + + assert!(matches!(parse_tr("restrict(,Cons a b,)"), Err(_))); + + assert!(matches!( + parse_tr("restrict(Cons a b, Monad m)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x == "b")) && + matches!(&restrictions[1], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Monad") && + arguments.len() == 1 && + matches!(&arguments[0], Type::Variable(_, x) if x == "m")))); + + assert!(matches!( + parse_tr("restrict(Cons a b, Monad m,)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x == "b")) && + matches!(&restrictions[1], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x == "Monad") && + arguments.len() == 1 && + matches!(&arguments[0], Type::Variable(_, x) if x == "m")))); +} + +#[test] +fn field_definition() { + let parse_fd = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_field_definition() + }; + + assert!(matches!(parse_fd("foo"), Err(_),)); + assert!(matches!( + parse_fd("foo,"), + Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) + if name == "foo" + )); + assert!(matches!( + parse_fd("foo}"), + Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) + if name == "foo" + )); + + assert!(matches!( + parse_fd("foo: Word8,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name == "foo" && + matches!(&field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + args.is_empty()))); + + assert!(matches!( + parse_fd("foo: Cons a b,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name == "foo" && + matches!(&field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(&args.as_slice(), &[Type::Variable(_, v1), Type::Variable(_, v2)] + if v1 == "a" && v2 == "b")))); + + assert!(matches!( + parse_fd("foo: a -> b,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name == "foo" && + matches!(&field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b == "b")))); + + assert!(matches!( + parse_fd("export foo: a -> b,"), + Ok(Some(StructureField{ name, export: ExportClass::Public, field_type, .. })) + if name == "foo" && + matches!(&field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b == "b")))); +} + +#[test] +fn structures() { + let parse_st = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_structure() + }; + + assert!(matches!(parse_st("structure { }"), Err(_))); + assert!(matches!(parse_st("structure {"), Err(_))); + assert!(matches!(parse_st("structure foo {}"), Err(_))); + + assert!(matches!( + parse_st("structure Foo {}"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && fields.is_empty())); + + assert!(matches!( + parse_st("structure Foo { bar }"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && + matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] + if name == "bar" && matches!(field_type, None)))); + + assert!(matches!( + parse_st("structure Foo { bar: Word8 }"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && + matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] + if name == "bar" && + matches!(field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + args.is_empty())))); + + assert!(matches!( + parse_st("structure Foo { bar: Word8, goo }"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name == "bar" && + name2 == "goo" && + matches!(field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + args.is_empty())))); + + assert!(matches!( + parse_st("structure Foo { bar: b c -> a, goo }"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name == "bar" && + name2 == "goo" && + matches!(field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] + if b == "b" && c == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a == "a"))))); + + assert!(matches!( + parse_st("structure Foo { bar: b c -> a, goo, }"), + Ok(StructureDef { name, fields, .. }) + if name == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name == "bar" && + name2 == "goo" && + matches!(field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] + if b == "b" && c == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a == "a"))))); +} + +#[test] +fn enum_variant() { + let parse_ev = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_enum_variant() + }; + + assert!(matches!(parse_ev("foo"), Err(_),)); + assert!(matches!(parse_ev("foo,"), Err(_),)); + assert!(matches!(parse_ev("Cons foo,"), Err(_),)); + assert!(matches!(parse_ev(""), Err(_))); + + assert!(matches!(parse_ev("}"), Ok(None))); + + assert!(matches!( + parse_ev("Cons,"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name == "Cons" && argument.is_none())); + assert!(matches!( + parse_ev("Cons }"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name == "Cons" && argument.is_none())); + assert!(matches!( + parse_ev("Cons, }"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name == "Cons" && argument.is_none())); + + assert!(matches!( + parse_ev("Cons(Pair a),"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name == "Cons" && + matches!(argument, Some(Type::Application(typef, args)) + if matches!(typef.as_ref(), Type::Constructor(_, name) + if name == "Pair") && + matches!(&args.as_slice(), &[Type::Variable(_, argname)] + if argname == "a")))); + assert!(matches!( + parse_ev("Cons(Pair a) }"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name == "Cons" && + matches!(argument, Some(Type::Application(typef, args)) + if matches!(typef.as_ref(), Type::Constructor(_, name) + if name == "Pair") && + matches!(&args.as_slice(), &[Type::Variable(_, argname)] + if argname == "a")))); + + assert!(matches!( + parse_ev("Cons(a b -> c) }"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name == "Cons" && + matches!(argument, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a), Type::Variable(_, b)] + if a == "a" && b == "b") && + matches!(ret.as_ref(), Type::Variable(_, c) if c == "c")))); +} + +#[test] +fn enumerations() { + let parse_en = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_enumeration() + }; + + assert!(matches!(parse_en("enumeration { }"), Err(_))); + assert!(matches!(parse_en("enumeration {"), Err(_))); + assert!(matches!(parse_en("enumeration"), Err(_))); + + assert!(matches!( + parse_en("enumeration Empty { }"), + Ok(EnumerationDef { name, variants, .. }) + if name == "Empty" && variants.is_empty())); + assert!(matches!( + parse_en("enumeration Alternates { A, B }"), + Ok(EnumerationDef { name, variants, .. }) + if name == "Alternates" && + matches!(&variants.as_slice(), &[ + EnumerationVariant { name: name1, argument: arg1, ..}, + EnumerationVariant { name: name2, argument: arg2, ..}, + ] if name1 == "A" && arg1.is_none() && + name2 == "B" && arg2.is_none()))); + assert!(matches!( + parse_en("enumeration Alternates { A, B, }"), + Ok(EnumerationDef { name, variants, .. }) + if name == "Alternates" && + matches!(&variants.as_slice(), &[ + EnumerationVariant { name: name1, argument: arg1, ..}, + EnumerationVariant { name: name2, argument: arg2, ..}, + ] if name1 == "A" && arg1.is_none() && + name2 == "B" && arg2.is_none()))); +} + +#[test] +fn expressions() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!(parse_ex(""), Err(_))); + assert!(matches!( + parse_ex("x"), + Ok(Expression::Reference(n)) if n.as_printed() == "x")); + assert!(matches!( + parse_ex("(x)"), + Ok(Expression::Reference(n)) if n.as_printed() == "x")); + assert!(matches!( + parse_ex("'c'"), + Ok(Expression::Value(ConstantValue::Character(_, _))) + )); + assert!(matches!( + parse_ex("\"c\""), + Ok(Expression::Value(ConstantValue::String(_, _))) + )); + assert!(matches!( + parse_ex("1"), + Ok(Expression::Value(ConstantValue::Integer(_, _))) + )); + assert!(matches!( + parse_ex("(1)"), + Ok(Expression::Value(ConstantValue::Integer(_, _))) + )); +} + +#[test] +fn enumeration_values() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!(parse_ex("Hello::world"), Err(_))); + assert!(matches!( + parse_ex("Hello::World"), + Ok(Expression::EnumerationValue(t, v, None)) + if t.as_printed() == "Hello" && + v.as_printed() == "World")); + assert!(matches!( + parse_ex("Hello::World(a)"), + Ok(Expression::EnumerationValue(t, v, Some(_))) + if t.as_printed() == "Hello" && + v.as_printed() == "World")); +} + +#[test] +fn structure_value() { + let parse_st = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!(parse_st("Foo{ , }"), Err(_))); + assert!(matches!(parse_st("Foo{ foo, }"), Err(_))); + assert!(matches!(parse_st("Foo{ foo: , }"), Err(_))); + assert!(matches!(parse_st("Foo{ , foo: 1, }"), Err(_))); + assert!(matches!( + parse_st("Foo{ foo: 1 }"), + Ok(Expression::StructureValue(sname, values)) + if sname.as_printed() == "Foo" && + matches!(values.as_slice(), [FieldValue{ field, value }] + if field.as_printed() == "foo" && + matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, }"), + Ok(Expression::StructureValue(sname, values)) + if sname.as_printed() == "Foo" && + matches!(values.as_slice(), [FieldValue{ field, value }] + if field.as_printed() == "foo" && + matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, bar: \"foo\" }"), + Ok(Expression::StructureValue(sname, values)) + if sname.as_printed() == "Foo" && + matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 }, + FieldValue{ field: f2, value: v2 }] + if f1.as_printed() == "foo" && + f2.as_printed() == "bar" && + matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && + matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, bar: \"foo\", }"), + Ok(Expression::StructureValue(sname, values)) + if sname.as_printed() == "Foo" && + matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 }, + FieldValue{ field: f2, value: v2 }] + if f1.as_printed() == "foo" && + f2.as_printed() == "bar" && + matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && + matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1,, bar: \"foo\", }"), + Err(_))); +} -- 2.53.0 From c31be288ad4ab8cafb06d139f1a9d0725fcd7f01 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sun, 28 Sep 2025 11:42:11 -0700 Subject: [PATCH 11/33] Calls and infix expressions. --- src/syntax.rs | 9 ++ src/syntax/parse.rs | 140 ++++++++++++++++++++- src/syntax/parser_tests.rs | 252 +++++++++++++++++++++++++++++++++++++ 3 files changed, 400 insertions(+), 1 deletion(-) diff --git a/src/syntax.rs b/src/syntax.rs index fac81e6..dfcf197 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -126,6 +126,15 @@ pub enum Expression { Reference(Name), EnumerationValue(Name, Name, Option>), StructureValue(Name, Vec), + Call(Box, CallKind, Vec), +} + +#[derive(Debug)] +pub enum CallKind { + Infix, + Normal, + Postfix, + Prefix, } #[derive(Debug)] diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 0ee635c..7161071 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -1,19 +1,58 @@ use crate::syntax::error::ParserError; use crate::syntax::tokens::{Lexer, LocatedToken, Token}; use crate::syntax::*; +use std::collections::HashMap; pub struct Parser<'a> { file_id: usize, lexer: Lexer<'a>, known_tokens: Vec, + precedence_table: HashMap, +} + +pub enum Associativity { + Left, + Right, + None, } impl<'a> Parser<'a> { + /// Create a new parser from the given file index and lexer. + /// + /// The file index will be used for annotating locations and for + /// error messages. If you don't care about either, you can use + /// 0 with no loss of functionality. (Obviously, it will be harder + /// to create quality error messages, but you already knew that.) pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> { Parser { file_id, lexer, known_tokens: vec![], + precedence_table: HashMap::new(), + } + } + + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + pub fn add_infix_precedence( + &mut self, + operator: S, + associativity: Associativity, + level: u8 + ) { + let actual_associativity = match associativity { + Associativity::Left => (level * 2, (level * 2) + 1), + Associativity::Right => ((level * 2) + 1, level * 2), + Associativity::None => (level * 2, level * 2), + }; + + self.precedence_table.insert(operator.to_string(), actual_associativity); + } + + fn get_precedence(&self, name: &String) -> (u8, u8) { + match self.precedence_table.get(name) { + None => (19, 20), + Some(x) => *x, } } @@ -549,7 +588,106 @@ impl<'a> Parser<'a> { } pub fn parse_expression(&mut self) -> Result { - self.parse_base_expression() + let next = self.next()?.ok_or_else(|| + self.bad_eof("looking for an expression"))?; + + self.save(next.clone()); + match next.token { + Token::ValueName(x) if x == "match" => self.parse_match_expression(), + Token::ValueName(x) if x == "if" => self.parse_if_expression(), + _ => self.parse_infix(0), + } + } + + pub fn parse_match_expression(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_if_expression(&mut self) -> Result { + unimplemented!() + } + + pub fn parse_infix(&mut self, level: u8) -> Result { + let mut lhs = self.parse_base_expression()?; + + loop { + let Some(next) = self.next()? else { + return Ok(lhs); + }; + + match next.token { + Token::OpenParen => { + self.save(next); + let args = self.parse_call_arguments()?; + lhs = Expression::Call(Box::new(lhs), CallKind::Normal, args); + } + Token::OperatorName(ref n) => { + let (left_pr, right_pr) = self.get_precedence(&n); + + if left_pr < level { + self.save(next); + break; + } + + let rhs = self.parse_infix(right_pr)?; + let name = Name::new(self.to_location(next.span), n); + let opref = Box::new(Expression::Reference(name)); + let args = vec![lhs, rhs]; + + lhs = Expression::Call(opref, CallKind::Infix, args); + } + _ => { + self.save(next); + return Ok(lhs); + } + } + } + + Ok(lhs) + } + + fn parse_call_arguments(&mut self) -> Result, ParserError> { + let next = self.next()?.ok_or_else(|| self.bad_eof( + "looking for open paren for function arguments"))?; + + if !matches!(next.token, Token::OpenParen) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "open paren for call arguments", + }); + } + + let mut args = vec![]; + + loop { + let next = self.next()?.ok_or_else(|| self.bad_eof( + "looking for an expression or close paren in function arguments"))?; + + if matches!(next.token, Token::CloseParen) { + break; + } + + self.save(next); + let argument = self.parse_infix(0)?; + args.push(argument); + + let next = self.next()?.ok_or_else(|| self.bad_eof( + "looking for comma or close paren in function arguments"))?; + match next.token { + Token::Comma => continue, + Token::CloseParen => break, + _ => return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "comma or close paren in function arguments", + }), + } + } + + Ok(args) } pub fn parse_base_expression(&mut self) -> Result { diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index bb3ce5a..8be3eff 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -517,3 +517,255 @@ fn structure_value() { parse_st("Foo{ foo: 1,, bar: \"foo\", }"), Err(_))); } + +#[test] +fn infix_and_precedence() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 6); + result.add_infix_precedence("*", parse::Associativity::Right, 7); + result.parse_expression() + }; + + + assert!(matches!( + parse_ex("0"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("(0)"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("((0))"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("1 + 2"), + Ok(Expression::Call(plus, CallKind::Infix, args)) + if matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2))); + assert!(matches!( + parse_ex("1 + 2 + 3"), + Ok(Expression::Call(plus, CallKind::Infix, args)) + if matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(innerplus, CallKind::Infix, inner_args), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v3 == 3 && + matches!(innerplus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2)))); + assert!(matches!( + parse_ex("1 * 2 * 3"), + Ok(Expression::Call(times, CallKind::Infix, args)) + if matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Call(innertimes, CallKind::Infix, inner_args), + ] if *v1 == 1 && + matches!(innertimes.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v2 == 2 && *v3 == 3)))); + + assert!(matches!( + parse_ex("1 + 2 * 3 + 4"), + Ok(Expression::Call(plus_right, CallKind::Infix, outer_args)) if + matches!(plus_right.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(plus_left, CallKind::Infix, left_args), + Expression::Value(ConstantValue::Integer(_, v4)) + ] if + matches!(v4, IntegerWithBase{ value: 4, .. }) && + matches!(plus_left.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Call(times, CallKind::Infix, times_args) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(times_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v2)), + Expression::Value(ConstantValue::Integer(_, v3)) + ] if + matches!(v2, IntegerWithBase{ value: 2, .. }) && + matches!(v3, IntegerWithBase{ value: 3, .. })))))); + + assert!(matches!( + parse_ex("1 * 2 + 3 * 4"), + Ok(Expression::Call(plus, CallKind::Infix, outer_args)) if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(left_times, CallKind::Infix, left_args), + Expression::Call(right_times, CallKind::Infix, right_args) + ] if + matches!(left_times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(right_times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Value(ConstantValue::Integer(_, v2)), + ] if + matches!(v1, IntegerWithBase { value: 1, .. }) && + matches!(v2, IntegerWithBase { value: 2, .. })) && + matches!(right_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v3)), + Expression::Value(ConstantValue::Integer(_, v4)), + ] if + matches!(v3, IntegerWithBase { value: 3, .. }) && + matches!(v4, IntegerWithBase { value: 4, .. }))))); +} + +#[test] +fn calls() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 6); + result.add_infix_precedence("*", parse::Associativity::Right, 7); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("f()"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + args.is_empty())); + assert!(matches!( + parse_ex("f(a)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + assert!(matches!( + parse_ex("f(a,b)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(a), + Expression::Reference(b), + ] if a.as_printed() == "a" && b.as_printed() == "b"))); + assert!(matches!( + parse_ex("f(a,b,)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(a), + Expression::Reference(b), + ] if a.as_printed() == "a" && b.as_printed() == "b"))); + assert!(matches!( + parse_ex("f(,a,b,)"), + Err(_))); + assert!(matches!( + parse_ex("f(a,,b,)"), + Err(_))); + assert!(matches!( + parse_ex("f(a,b,,)"), + Err(_))); + + assert!(matches!( + parse_ex("f()()"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Call(inner, CallKind::Normal, inner_args) if + matches!(inner.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + inner_args.is_empty()) && + args.is_empty())); + + assert!(matches!( + parse_ex("f() + 1"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(subcall, CallKind::Normal, subargs), + Expression::Value(ConstantValue::Integer(_, v1)) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(subcall.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + subargs.is_empty()))); + + assert!(matches!( + parse_ex("f(a + b, c*d)"), + Ok(Expression::Call(eff, CallKind::Normal, args)) if + matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Call(plus, CallKind::Infix, pargs), + Expression::Call(times, CallKind::Infix, targs), + ] if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(a), Expression::Reference(b) ] if + a.as_printed() == "a" && b.as_printed() == "b") && + matches!(targs.as_slice(), [ Expression::Reference(c), Expression::Reference(d) ] if + c.as_printed() == "c" && d.as_printed() == "d")))); + + assert!(matches!( + parse_ex("f(a + b, c*d,)"), + Ok(Expression::Call(eff, CallKind::Normal, args)) if + matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Call(plus, CallKind::Infix, pargs), + Expression::Call(times, CallKind::Infix, targs), + ] if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(a), Expression::Reference(b) ] if + a.as_printed() == "a" && b.as_printed() == "b") && + matches!(targs.as_slice(), [ Expression::Reference(c), Expression::Reference(d) ] if + c.as_printed() == "c" && d.as_printed() == "d")))); + + assert!(matches!( + parse_ex("3 + f(1 + 2)"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v3)), + Expression::Call(eff, CallKind::Normal, fargs) + ] if + matches!(v3, IntegerWithBase{ value: 3, .. }) && + matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(fargs.as_slice(), [Expression::Call(p, CallKind::Infix, pargs)] if + matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if + matches!(v1, ConstantValue::Integer(_, IntegerWithBase { value: 1, .. })) && + matches!(v2, ConstantValue::Integer(_, IntegerWithBase { value: 2, .. }))))))); + + assert!(matches!( + parse_ex("(f . g)(1 + 2)"), + Ok(Expression::Call(fg, CallKind::Normal, args)) if + matches!(fg.as_ref(), Expression::Call(dot, CallKind::Infix, fgargs) if + matches!(dot.as_ref(), Expression::Reference(n) if n.as_printed() == ".") && + matches!(fgargs.as_slice(), [Expression::Reference(f), Expression::Reference(g)] if + f.as_printed() == "f" && g.as_printed() == "g")) && + matches!(args.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if + matches!(v1, ConstantValue::Integer(_, IntegerWithBase{ value: 1, .. })) && + matches!(v2, ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })))))); + + assert!(matches!( + parse_ex("a + b(2 + 3) * c"), + Ok(Expression::Call(plus, CallKind::Infix, pargs)) if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [ + Expression::Reference(a), + Expression::Call(times, CallKind::Infix, targs) + ] if a.as_printed() == "a" && + matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(targs.as_slice(), [ + Expression::Call(b, CallKind::Normal, bargs), + Expression::Reference(c), + ] if c.as_printed() == "c" && + matches!(b.as_ref(), Expression::Reference(n) if n.as_printed() == "b") && + matches!(bargs.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 3, .. })) + ])))))); +} -- 2.53.0 From f6bf3dd6398e496900c7be35950909ea0ad730cc Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Fri, 3 Oct 2025 22:37:20 -0400 Subject: [PATCH 12/33] Blocks and conditionals. --- src/syntax.rs | 13 +- src/syntax/name.rs | 2 +- src/syntax/parse.rs | 341 +++++++++++++++++++++++++++++++++---- src/syntax/parser_tests.rs | 221 ++++++++++++++++++++---- src/syntax/tokens.rs | 50 +++++- 5 files changed, 556 insertions(+), 71 deletions(-) diff --git a/src/syntax.rs b/src/syntax.rs index dfcf197..35ae376 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -110,13 +110,14 @@ pub enum ExportClass { #[derive(Debug)] pub enum Statement { Binding(BindingStmt), + Expression(Expression), } #[derive(Debug)] pub struct BindingStmt { location: Location, mutable: bool, - variable: String, + variable: Name, value: Expression, } @@ -126,7 +127,17 @@ pub enum Expression { Reference(Name), EnumerationValue(Name, Name, Option>), StructureValue(Name, Vec), + Conditional(ConditionalExpr), Call(Box, CallKind, Vec), + Block(Location, Vec), +} + +#[derive(Debug)] +pub struct ConditionalExpr { + location: Location, + test: Box, + consequent: Box, + alternative: Option>, } #[derive(Debug)] diff --git a/src/syntax/name.rs b/src/syntax/name.rs index 5dd90e0..fbfdb5f 100644 --- a/src/syntax/name.rs +++ b/src/syntax/name.rs @@ -1,4 +1,4 @@ -use crate::syntax::{Located, Location}; +use crate::syntax::Location; use std::cmp; use std::fmt; use std::hash; diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 7161071..ddb7c2e 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -7,7 +7,9 @@ pub struct Parser<'a> { file_id: usize, lexer: Lexer<'a>, known_tokens: Vec, - precedence_table: HashMap, + prefix_precedence_table: HashMap, + infix_precedence_table: HashMap, + postfix_precedence_table: HashMap, } pub enum Associativity { @@ -20,7 +22,7 @@ impl<'a> Parser<'a> { /// Create a new parser from the given file index and lexer. /// /// The file index will be used for annotating locations and for - /// error messages. If you don't care about either, you can use + /// error messages. If you don't care about either, you can use /// 0 with no loss of functionality. (Obviously, it will be harder /// to create quality error messages, but you already knew that.) pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> { @@ -28,7 +30,9 @@ impl<'a> Parser<'a> { file_id, lexer, known_tokens: vec![], - precedence_table: HashMap::new(), + prefix_precedence_table: HashMap::new(), + infix_precedence_table: HashMap::new(), + postfix_precedence_table: HashMap::new(), } } @@ -38,7 +42,7 @@ impl<'a> Parser<'a> { &mut self, operator: S, associativity: Associativity, - level: u8 + level: u8, ) { let actual_associativity = match associativity { Associativity::Left => (level * 2, (level * 2) + 1), @@ -46,11 +50,22 @@ impl<'a> Parser<'a> { Associativity::None => (level * 2, level * 2), }; - self.precedence_table.insert(operator.to_string(), actual_associativity); + self.infix_precedence_table + .insert(operator.to_string(), actual_associativity); + } + + pub fn add_prefix_precedence(&mut self, operator: S, level: u8) { + self.prefix_precedence_table + .insert(operator.to_string(), level * 2); + } + + pub fn add_postfix_precedence(&mut self, operator: S, level: u8) { + self.postfix_precedence_table + .insert(operator.to_string(), level * 2); } fn get_precedence(&self, name: &String) -> (u8, u8) { - match self.precedence_table.get(name) { + match self.infix_precedence_table.get(name) { None => (19, 20), Some(x) => *x, } @@ -588,14 +603,17 @@ impl<'a> Parser<'a> { } pub fn parse_expression(&mut self) -> Result { - let next = self.next()?.ok_or_else(|| - self.bad_eof("looking for an expression"))?; + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an expression"))?; self.save(next.clone()); match next.token { Token::ValueName(x) if x == "match" => self.parse_match_expression(), - Token::ValueName(x) if x == "if" => self.parse_if_expression(), - _ => self.parse_infix(0), + Token::ValueName(x) if x == "if" => { + Ok(Expression::Conditional(self.parse_if_expression()?)) + } + _ => self.parse_arithmetic(0), } } @@ -603,12 +621,244 @@ impl<'a> Parser<'a> { unimplemented!() } - pub fn parse_if_expression(&mut self) -> Result { - unimplemented!() + pub fn parse_if_expression(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an 'if' to start conditional"))?; + if !matches!(next.token, Token::ValueName(ref x) if x == "if") { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "an 'if' to start a conditional", + }); + } + let start = self.to_location(next.span); + + let test = self.parse_arithmetic(0)?; + let consequent = self.parse_block()?; + + let maybe_else = self.next()?; + let (alternative, location) = match maybe_else { + Some(LocatedToken { + token: Token::ValueName(ref n), + .. + }) if n == "else" => { + let expr = self.parse_block()?; + let location = match expr { + Expression::Block(ref l, _) => l.clone(), + _ => panic!("How did parse_block not return a block?!"), + }; + + (Some(Box::new(expr)), location) + } + + _ => { + let location = match consequent { + Expression::Block(ref l, _) => l.clone(), + _ => panic!("How did parse_block not return a block?!"), + }; + + (None, location) + } + }; + + Ok(ConditionalExpr { + location, + test: Box::new(test), + consequent: Box::new(consequent), + alternative, + }) } - pub fn parse_infix(&mut self, level: u8) -> Result { - let mut lhs = self.parse_base_expression()?; + pub fn parse_block(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for open brace to start block"))?; + if !matches!(next.token, Token::OpenBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "an open brace to start a block", + }); + } + let start = self.to_location(next.span); + + let mut statements = vec![]; + let mut ended_with_expr = false; + + while let Some((stmt, terminal)) = self.parse_statement()? { + statements.push(stmt); + if terminal { + ended_with_expr = true; + break; + } + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for statement or block close"))?; + if !matches!(next.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "a close brace to end a block", + }); + } + let end = self.to_location(next.span); + + if !ended_with_expr { + let void_name = Name::new(end.clone(), "%prim%void"); + let void_ref = Expression::Reference(void_name); + let void_call = Expression::Call(Box::new(void_ref), CallKind::Normal, vec![]); + statements.push(Statement::Expression(void_call)); + } + + Ok(Expression::Block(start.extend_to(&end), statements)) + } + + pub fn parse_statement(&mut self) -> Result, ParserError> { + loop { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; + + match next.token { + Token::CloseBrace => { + self.save(next); + return Ok(None); + } + + Token::ValueName(ref l) if l == "let" => { + self.save(next); + return Ok(Some((Statement::Binding(self.parse_let()?), false))); + } + + _ => { + self.save(next); + let expr = Statement::Expression(self.parse_expression()?); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; + + if matches!(next.token, Token::Semi) { + return Ok(Some((expr, false))); + } else { + self.save(next); + return Ok(Some((expr, true))); + } + } + } + } + } + + pub fn parse_let(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a let for a binding statement"))?; + if !matches!(next.token, Token::ValueName(ref n) if n == "let") { + self.save(next.clone()); + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "a 'let' to open a binding statement", + }); + } + let start = self.to_location(next.span); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("'mut' or a variable name"))?; + let mutable = matches!(next.token, Token::ValueName(ref n) if n == "mut"); + if !mutable { + self.save(next); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("a variable name"))?; + let variable = match next.token { + Token::ValueName(v) => Name::new(self.to_location(next.span), v), + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "a variable name for the let binding", + }); + } + }; + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("an '=' after a variable name in a binding"))?; + if !matches!(next.token, Token::OperatorName(ref x) if x == "=") { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "an '=' after the variable name in a let binding", + }); + } + + let value = self.parse_expression()?; + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for terminal semicolon for let statement"))?; + if !matches!(next.token, Token::Semi) { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "a semicolon to finish a let statement", + }); + } + let end = self.to_location(next.span); + + Ok(BindingStmt { + location: start.extend_to(&end), + mutable, + variable, + value, + }) + } + + pub fn parse_arithmetic(&mut self, level: u8) -> Result { + // start by checking for prefix operators. + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for arithmetic expression"))?; + + let mut lhs = if let Token::OperatorName(ref n) = next.token { + if let Some(pre_prec) = self.prefix_precedence_table.get(n) { + if *pre_prec < level { + self.save(next.clone()); + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "a base expression of a tighter-binding prefix operator", + }); + } + + let rhs = self.parse_arithmetic(*pre_prec)?; + let opname = Name::new(self.to_location(next.span), n); + let op_expr = Expression::Reference(opname); + + Expression::Call(Box::new(op_expr), CallKind::Prefix, vec![rhs]) + } else { + self.save(next); + self.parse_base_expression()? + } + } else { + self.save(next); + self.parse_base_expression()? + }; loop { let Some(next) = self.next()? else { @@ -621,7 +871,21 @@ impl<'a> Parser<'a> { let args = self.parse_call_arguments()?; lhs = Expression::Call(Box::new(lhs), CallKind::Normal, args); } + Token::OperatorName(ref n) => { + if let Some(postprec) = self.postfix_precedence_table.get(n) { + if *postprec < level { + self.save(next); + break; + } + + let opname = Name::new(self.to_location(next.span), n); + let op_expr = Expression::Reference(opname); + + lhs = Expression::Call(Box::new(op_expr), CallKind::Postfix, vec![lhs]); + continue; + } + let (left_pr, right_pr) = self.get_precedence(&n); if left_pr < level { @@ -629,13 +893,14 @@ impl<'a> Parser<'a> { break; } - let rhs = self.parse_infix(right_pr)?; + let rhs = self.parse_arithmetic(right_pr)?; let name = Name::new(self.to_location(next.span), n); let opref = Box::new(Expression::Reference(name)); let args = vec![lhs, rhs]; lhs = Expression::Call(opref, CallKind::Infix, args); } + _ => { self.save(next); return Ok(lhs); @@ -647,43 +912,48 @@ impl<'a> Parser<'a> { } fn parse_call_arguments(&mut self) -> Result, ParserError> { - let next = self.next()?.ok_or_else(|| self.bad_eof( - "looking for open paren for function arguments"))?; + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for open paren for function arguments"))?; if !matches!(next.token, Token::OpenParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, - span: next.span, - token: next.token, - expected: "open paren for call arguments", + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "open paren for call arguments", }); } - let mut args = vec![]; + let mut args = vec![]; loop { - let next = self.next()?.ok_or_else(|| self.bad_eof( - "looking for an expression or close paren in function arguments"))?; + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for an expression or close paren in function arguments") + })?; if matches!(next.token, Token::CloseParen) { break; } self.save(next); - let argument = self.parse_infix(0)?; + let argument = self.parse_arithmetic(0)?; args.push(argument); - let next = self.next()?.ok_or_else(|| self.bad_eof( - "looking for comma or close paren in function arguments"))?; + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close paren in function arguments") + })?; match next.token { Token::Comma => continue, Token::CloseParen => break, - _ => return Err(ParserError::UnexpectedToken { - file_id: self.file_id, - span: next.span, - token: next.token, - expected: "comma or close paren in function arguments", - }), + _ => { + return Err(ParserError::UnexpectedToken { + file_id: self.file_id, + span: next.span, + token: next.token, + expected: "comma or close paren in function arguments", + }); + } } } @@ -700,7 +970,10 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for an expression"))?; match next.token { - Token::OpenBrace => unimplemented!(), + Token::OpenBrace => { + self.save(next); + return self.parse_block(); + } Token::OpenParen => { let inner = self.parse_expression()?; diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index 8be3eff..5960671 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -1,5 +1,6 @@ +use crate::syntax::error::ParserError; use crate::syntax::parse::Parser; -use crate::syntax::tokens::Lexer; +use crate::syntax::tokens::{Lexer, Token}; use crate::syntax::*; #[test] @@ -513,9 +514,7 @@ fn structure_value() { f2.as_printed() == "bar" && matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); - assert!(matches!( - parse_st("Foo{ foo: 1,, bar: \"foo\", }"), - Err(_))); + assert!(matches!(parse_st("Foo{ foo: 1,, bar: \"foo\", }"), Err(_))); } #[test] @@ -528,7 +527,6 @@ fn infix_and_precedence() { result.parse_expression() }; - assert!(matches!( parse_ex("0"), Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) @@ -659,15 +657,9 @@ fn calls() { Expression::Reference(a), Expression::Reference(b), ] if a.as_printed() == "a" && b.as_printed() == "b"))); - assert!(matches!( - parse_ex("f(,a,b,)"), - Err(_))); - assert!(matches!( - parse_ex("f(a,,b,)"), - Err(_))); - assert!(matches!( - parse_ex("f(a,b,,)"), - Err(_))); + assert!(matches!(parse_ex("f(,a,b,)"), Err(_))); + assert!(matches!(parse_ex("f(a,,b,)"), Err(_))); + assert!(matches!(parse_ex("f(a,b,,)"), Err(_))); assert!(matches!( parse_ex("f()()"), @@ -749,23 +741,190 @@ fn calls() { matches!(v2, ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })))))); assert!(matches!( - parse_ex("a + b(2 + 3) * c"), - Ok(Expression::Call(plus, CallKind::Infix, pargs)) if + parse_ex("a + b(2 + 3) * c"), + Ok(Expression::Call(plus, CallKind::Infix, pargs)) if + matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [ + Expression::Reference(a), + Expression::Call(times, CallKind::Infix, targs) + ] if a.as_printed() == "a" && + matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(targs.as_slice(), [ + Expression::Call(b, CallKind::Normal, bargs), + Expression::Reference(c), + ] if c.as_printed() == "c" && + matches!(b.as_ref(), Expression::Reference(n) if n.as_printed() == "b") && + matches!(bargs.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && matches!(pargs.as_slice(), [ - Expression::Reference(a), - Expression::Call(times, CallKind::Infix, targs) - ] if a.as_printed() == "a" && - matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && - matches!(targs.as_slice(), [ - Expression::Call(b, CallKind::Normal, bargs), - Expression::Reference(c), - ] if c.as_printed() == "c" && - matches!(b.as_ref(), Expression::Reference(n) if n.as_printed() == "b") && - matches!(bargs.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && - matches!(pargs.as_slice(), [ - Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })), - Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 3, .. })) - ])))))); + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 3, .. })) + ])))))); +} + +#[test] +fn prefix_and_postfix() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 4); + result.add_infix_precedence("*", parse::Associativity::Left, 8); + result.add_prefix_precedence("++", 6); + result.add_postfix_precedence("++", 6); + result.add_prefix_precedence("--", 7); + result.add_postfix_precedence("--", 7); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("++a"), + Ok(Expression::Call(pp, CallKind::Prefix, args)) if + matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + + assert!(matches!( + parse_ex("a--"), + Ok(Expression::Call(pp, CallKind::Postfix, args)) if + matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + + // the prefix is weaker than the postfix, so it should be the outside + // operatotr + assert!(matches!( + parse_ex("++a--"), + Ok(Expression::Call(pp, CallKind::Prefix, args)) if + matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Call(mm, CallKind::Postfix, args)] if + matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + + // the prefix is stronger than the postfix, so it should be the inside + // operator + assert!(matches!( + parse_ex("--a++"), + Ok(Expression::Call(pp, CallKind::Postfix, args)) if + matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Call(mm, CallKind::Prefix, args)] if + matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + + assert!(matches!( + parse_ex("a++ + b"), + Ok(Expression::Call(p, CallKind::Infix, args)) if + matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(mm, CallKind::Postfix, args), + Expression::Reference(n) + ] if n.as_printed() == "b" && + matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + + assert!(matches!( + parse_ex("a + ++ b"), + Ok(Expression::Call(p, CallKind::Infix, args)) if + matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Reference(n), + Expression::Call(mm, CallKind::Prefix, args), + ] if n.as_printed() == "a" && + matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "b")))); + + assert!(matches!( + parse_ex("a * ++ b"), + Err(ParserError::UnexpectedToken{ token: Token::OperatorName(pp), .. }) + if pp == "++")); +} + +#[test] +fn blocks() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("{}"), + Ok(Expression::Block(_, void)) if + matches!(void.as_slice(), [Statement::Expression(call)] if + matches!(call, Expression::Call(void, CallKind::Normal, vargs) if + matches!(void.as_ref(), Expression::Reference(n) if + n.as_printed() == "%prim%void") && + vargs.is_empty())))); + assert!(matches!( + parse_ex("{ x }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [Statement::Expression(Expression::Reference(n))] if + n.as_printed() == "x"))); + assert!(matches!( + parse_ex("{ x; }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(n)), + Statement::Expression(Expression::Call(primv, CallKind::Normal, vargs)), + ] if n.as_printed() == "x" && vargs.is_empty() && + matches!(primv.as_ref(), Expression::Reference(n) if + n.as_printed() == "%prim%void")))); + assert!(matches!( + parse_ex("{ x; y }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(x)), + Statement::Expression(Expression::Reference(y)), + ] if x.as_printed() == "x" && y.as_printed() == "y"))); +} + +#[test] +fn bindings() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("{ let x = y; }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [Statement::Binding(b), Statement::Expression(_)] if + !b.mutable && + b.variable.as_printed() == "x" && + matches!(b.value, Expression::Reference(ref n) if n.as_printed() == "y")))); +} + +#[test] +fn conditionals() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new(0, lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("if x { y } else { z }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Reference(n) if n.as_printed() == "x") && + matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(n))] if + n.as_printed() == "y")) && + matches!(cond.alternative.as_ref(), Some(expr) if + matches!(expr.as_ref(), Expression::Block(_, ast) if + matches!(ast.as_slice(), [Statement::Expression(Expression::Reference(n))] if + n.as_printed() == "z"))))); + + assert!(matches!( + parse_ex("if x { y }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Reference(n) if n.as_printed() == "x") && + matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(n))] if + n.as_printed() == "y")) && + cond.alternative.is_none())); + + assert!(matches!(parse_ex("if x v { z }"), Err(_))); + + assert!(matches!( + parse_ex("if x + y { z }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Call(_, CallKind::Infix, _)))); } diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 08f5885..a990409 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -476,10 +476,33 @@ impl<'a> LexerState<'a> { token: Token::OperatorName("-".into()), span: token_start_offset..token_start_offset + 1, })), - Some((end, '>')) => Ok(Some(LocatedToken { - token: Token::Arrow, - span: token_start_offset..end, - })), + Some((end, '>')) => { + let Some((pbloc, peekaboo)) = self.next_char() else { + return Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })); + }; + let is_operator = !peekaboo.is_alphanumeric() + && !peekaboo.is_whitespace() + && !peekaboo.is_control(); + + if is_operator { + self.parse_identifier( + token_start_offset, + format!("->{peekaboo}"), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ) + } else { + self.stash_char(pbloc, peekaboo); + + Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })) + } + } Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => self .parse_identifier( token_start_offset, @@ -665,3 +688,22 @@ fn can_separate_pieces() { assert_eq!(Some(Token::ValueName("b".into())), next_token()); assert_eq!(None, next_token()); } + +#[test] +fn arrow_requires_nonop() { + let mut lexer = Lexer::from("->"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Arrow), next_token()); + + let mut lexer = Lexer::from("->*"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::OperatorName("->*".into())), next_token()); + + let mut lexer = Lexer::from("->*x"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::OperatorName("->*".into())), next_token()); + + let mut lexer = Lexer::from("->x"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Arrow), next_token()); +} -- 2.53.0 From 55df27de98585345ee00624385f2a48c88eab27f Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 11 Oct 2025 13:47:41 -0700 Subject: [PATCH 13/33] Recovered. --- Cargo.lock | 458 ++++++++++++++++++++++++++++++------- Cargo.toml | 5 +- src/syntax.rs | 50 ++++ src/syntax/error.rs | 23 +- src/syntax/location.rs | 42 ++-- src/syntax/parse.rs | 112 ++++----- src/syntax/parser_tests.rs | 32 +-- 7 files changed, 549 insertions(+), 173 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a80b059..6f6a4ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,36 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom 0.2.16", + "once_cell", + "version_check", + "zerocopy 0.7.35", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ariadne" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" +dependencies = [ + "concolor", + "unicode-width", + "yansi", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -12,8 +42,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" name = "bang" version = "0.1.0" dependencies = [ - "codespan", - "codespan-reporting", + "ariadne", + "internment", + "memmap2", "proptest", "proptest-derive", "thiserror", @@ -34,6 +65,12 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.9.4" @@ -47,26 +84,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] -name = "codespan" -version = "0.12.0" +name = "concolor" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4b418d52c9206820a56fc1aa28db73d67e346ba8ba6aa90987e8d6becef7e4" +checksum = "0b946244a988c390a94667ae0e3958411fa40cc46ea496a929b263d883f5f9c3" dependencies = [ - "codespan-reporting", - "serde", + "bitflags 1.3.2", + "concolor-query", + "is-terminal", ] [[package]] -name = "codespan-reporting" -version = "0.12.0" +name = "concolor-query" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" dependencies = [ - "serde", - "termcolor", - "unicode-width", + "windows-sys 0.45.0", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -74,7 +129,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -89,6 +144,23 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.3.3" @@ -98,7 +170,53 @@ dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasi 0.14.7+wasi-0.2.4", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "internment" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2" +dependencies = [ + "ahash", + "dashmap", + "hashbrown 0.15.5", + "once_cell", +] + +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.59.0", ] [[package]] @@ -119,6 +237,25 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memmap2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +dependencies = [ + "libc", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -134,13 +271,26 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy", + "zerocopy 0.8.27", ] [[package]] @@ -160,7 +310,7 @@ checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" dependencies = [ "bit-set", "bit-vec", - "bitflags", + "bitflags 2.9.4", "lazy_static", "num-traits", "rand", @@ -191,9 +341,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] @@ -230,7 +380,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom", + "getrandom 0.3.3", ] [[package]] @@ -242,6 +392,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "redox_syscall" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +dependencies = [ + "bitflags 2.9.4", +] + [[package]] name = "regex-syntax" version = "0.8.6" @@ -254,18 +413,18 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags", + "bitflags 2.9.4", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] name = "rusty-fork" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" dependencies = [ "fnv", "quick-error", @@ -274,34 +433,16 @@ dependencies = [ ] [[package]] -name = "serde" -version = "1.0.227" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245" -dependencies = [ - "serde_core", - "serde_derive", -] +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] -name = "serde_core" -version = "1.0.227" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.227" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" @@ -321,35 +462,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.3.3", "once_cell", "rustix", - "windows-sys", -] - -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", + "windows-sys 0.61.2", ] [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", @@ -370,9 +502,15 @@ checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wait-timeout" @@ -383,6 +521,12 @@ dependencies = [ "libc", ] +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + [[package]] name = "wasi" version = "0.14.7+wasi-0.2.4" @@ -401,43 +545,199 @@ dependencies = [ "wit-bindgen", ] -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - [[package]] name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" -version = "0.61.1" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive 0.7.35", +] + [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ - "zerocopy-derive", + "zerocopy-derive 0.8.27", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e994877..364d469 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,9 @@ version = "0.1.0" edition = "2024" [dependencies] -codespan = "0.12.0" -codespan-reporting = "0.12.0" +ariadne = { version = "0.5.1", features = ["auto-color"] } +internment = { version = "0.8.6", features = ["arc", "arena"] } +memmap2 = "0.9.8" proptest = "1.7.0" proptest-derive = "0.6.0" thiserror = "2.0.12" diff --git a/src/syntax.rs b/src/syntax.rs index 35ae376..4476080 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -6,11 +6,61 @@ mod parse; mod parser_tests; pub mod tokens; +pub use crate::syntax::error::ParserError; +use crate::syntax::parse::Parser; +use crate::syntax::tokens::Lexer; +use internment::ArcIntern; pub use location::{Located, Location}; +use memmap2::Mmap; pub use name::Name; use proptest_derive::Arbitrary; +use std::collections::HashMap; use std::fmt::Debug; use std::ops::Range; +use std::path::{Path, PathBuf}; + +pub struct Universe { + pub files: HashMap, + pub modules: HashMap, +} + +impl Default for Universe { + fn default() -> Self { + Universe { + files: HashMap::new(), + modules: HashMap::new(), + } + } +} + +impl Universe { + pub fn add_file>(&mut self, file: P) -> Result<(), ParserError> { + let filename = file.as_ref().to_string_lossy().into_owned(); + + let file_handle = std::fs::File::open(&file) + .map_err(|e| ParserError::OpenError { + file: filename.clone(), + error: e, + })?; + let contents = unsafe { Mmap::map(&file_handle) } + .map_err(|e| ParserError::ReadError { + file: filename.clone(), + error: e, + })?; + let string_contents = std::str::from_utf8(&contents) + .map_err(|e| ParserError::Utf8Error { + file: filename.clone(), + error: e, + })?; + + let lexer = Lexer::from(string_contents); + let mut parser = Parser::new(&file, lexer); + let module = parser.parse_module()?; + self.modules.insert(file.as_ref().to_path_buf(), module); + + Ok(()) + } +} #[derive(Debug)] pub struct Module { diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 35924b9..7700cb1 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -1,23 +1,34 @@ //use codespan_reporting::diagnostic::{Diagnostic, Label}; use crate::syntax::tokens::Token; use std::ops::Range; +use std::path::PathBuf; +use internment::ArcIntern; use thiserror::Error; #[derive(Debug, Error)] pub enum ParserError { - #[error("Lexer error at {file_id}: {error}")] - LexerError { file_id: usize, error: LexerError }, + #[error("Lexer error at {file}: {error}")] + LexerError { file: ArcIntern, error: LexerError }, - #[error("Unacceptable end of file at {file_id} while {place}")] - UnacceptableEof { file_id: usize, place: &'static str }, + #[error("Unacceptable end of file at {file} while {place}")] + UnacceptableEof { file: ArcIntern, place: &'static str }, - #[error("Unexpected token at {file_id}: expected {expected}, saw {token}")] + #[error("Unexpected token at {file}: expected {expected}, saw {token}")] UnexpectedToken { - file_id: usize, + file: ArcIntern, span: Range, token: Token, expected: &'static str, }, + + #[error("Unexpected problem opening file {file}: {error}")] + OpenError { file: String, error: std::io::Error }, + + #[error("Unexpected problem reading file {file}: {error}")] + ReadError { file: String, error: std::io::Error }, + + #[error("UTF-8 problem reading file {file}: {error}")] + Utf8Error { file: String, error: std::str::Utf8Error }, } #[derive(Clone, Debug, Error, PartialEq)] diff --git a/src/syntax/location.rs b/src/syntax/location.rs index fe4a352..104d6d8 100644 --- a/src/syntax/location.rs +++ b/src/syntax/location.rs @@ -1,6 +1,8 @@ -use codespan_reporting::diagnostic::Label; +use ariadne::Span; +use internment::ArcIntern; use std::cmp::{max, min}; use std::ops::Range; +use std::path::PathBuf; pub trait Located { fn location(&self) -> Location; @@ -8,19 +10,35 @@ pub trait Located { #[derive(Clone, Debug, Eq, PartialEq)] pub struct Location { - file_id: usize, + file: ArcIntern, span: Range, } +impl Span for Location { + type SourceId = ArcIntern; + + fn source(&self) -> &Self::SourceId { + &self.file + } + + fn start(&self) -> usize { + self.span.start + } + + fn end(&self) -> usize { + self.span.end + } +} + impl Location { - pub fn new(file_id: usize, span: Range) -> Self { - Location { file_id, span } + pub fn new(file: &ArcIntern, span: Range) -> Self { + Location { file: file.clone(), span } } pub fn extend_to(&self, other: &Location) -> Location { - assert_eq!(self.file_id, other.file_id); + assert_eq!(self.file, other.file); Location { - file_id: self.file_id, + file: self.file.clone(), span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), } } @@ -30,19 +48,11 @@ impl Location { self } - pub fn file_id(&self) -> usize { - self.file_id + pub fn file(&self) -> &str { + self.file.to_str().unwrap_or("") } pub fn span(&self) -> Range { self.span.clone() } - - pub fn primary_label(&self) -> Label { - Label::primary(self.file_id, self.span.clone()) - } - - pub fn secondary_label(&self) -> Label { - Label::secondary(self.file_id, self.span.clone()) - } } diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index ddb7c2e..d6c0d80 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -1,11 +1,12 @@ use crate::syntax::error::ParserError; use crate::syntax::tokens::{Lexer, LocatedToken, Token}; use crate::syntax::*; +use internment::ArcIntern; use std::collections::HashMap; -pub struct Parser<'a> { - file_id: usize, - lexer: Lexer<'a>, +pub struct Parser<'lexer> { + file: ArcIntern, + lexer: Lexer<'lexer>, known_tokens: Vec, prefix_precedence_table: HashMap, infix_precedence_table: HashMap, @@ -18,16 +19,19 @@ pub enum Associativity { None, } -impl<'a> Parser<'a> { +impl<'lexer> Parser<'lexer> { /// Create a new parser from the given file index and lexer. /// /// The file index will be used for annotating locations and for /// error messages. If you don't care about either, you can use /// 0 with no loss of functionality. (Obviously, it will be harder /// to create quality error messages, but you already knew that.) - pub fn new(file_id: usize, lexer: Lexer<'a>) -> Parser<'a> { + pub fn new>( + file: P, + lexer: Lexer<'lexer> + ) -> Parser<'lexer> { Parser { - file_id, + file: ArcIntern::new(file.as_ref().to_path_buf()), lexer, known_tokens: vec![], prefix_precedence_table: HashMap::new(), @@ -82,7 +86,7 @@ impl<'a> Parser<'a> { .next() .transpose() .map_err(|error| ParserError::LexerError { - file_id: self.file_id, + file: self.file.clone(), error, }) } @@ -94,13 +98,13 @@ impl<'a> Parser<'a> { fn bad_eof(&mut self, place: &'static str) -> ParserError { ParserError::UnacceptableEof { - file_id: self.file_id, + file: self.file.clone(), place, } } fn to_location(&self, span: Range) -> Location { - Location::new(self.file_id, span) + Location::new(&self.file, span) } pub fn parse_module(&mut self) -> Result { @@ -161,7 +165,7 @@ impl<'a> Parser<'a> { if !matches!(maybe_paren.token, Token::OpenParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_paren.span, token: maybe_paren.token, expected: "open parenthesis, following the restrict keyword", @@ -179,7 +183,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?; if !matches!(maybe_paren.token, Token::CloseParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_paren.span, token: maybe_paren.token, expected: "close parenthesis following type restrictions", @@ -212,7 +216,7 @@ impl<'a> Parser<'a> { weird => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_constructor.span, token: weird, expected: "Constructor name, comma, or close parenthesis in type restriction", @@ -261,7 +265,7 @@ impl<'a> Parser<'a> { } Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "'structure', 'enumeration', or a value identifier", @@ -274,7 +278,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for definition"))?; if !matches!(structure_token.token, Token::ValueName(ref s) if s == "structure") { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: structure_token.span, token: structure_token.token, expected: "the 'structure' keyword", @@ -288,7 +292,7 @@ impl<'a> Parser<'a> { Token::TypeName(str) => str, _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: name.span, token: name.token, expected: "a structure name", @@ -301,7 +305,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("the open brace after a structure name"))?; if !matches!(brace.token, Token::OpenBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: brace.span, token: brace.token, expected: "the brace after a structure name", @@ -319,7 +323,7 @@ impl<'a> Parser<'a> { })?; if !matches!(brace.token, Token::CloseBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: brace.span, token: brace.token, expected: "the brace at the end of a structure definition", @@ -355,7 +359,7 @@ impl<'a> Parser<'a> { })?; if !matches!(maybe_colon.token, Token::Colon) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_colon.span, token: maybe_colon.token, expected: "colon after field name in constructor", @@ -388,7 +392,7 @@ impl<'a> Parser<'a> { return Ok(None); } else { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_name.span, token: maybe_name.token, expected: "a field name", @@ -412,7 +416,7 @@ impl<'a> Parser<'a> { _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_colon.span, token: maybe_colon.token, expected: "colon, comma, or close brace after field name", @@ -432,7 +436,7 @@ impl<'a> Parser<'a> { } _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: end_token.span, token: end_token.token, expected: "looking for comma or close brace after field definition", @@ -459,7 +463,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for definition"))?; if !matches!(enumeration_token.token, Token::ValueName(ref e) if e == "enumeration") { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: enumeration_token.span, token: enumeration_token.token, expected: "the 'enumeration' keyword", @@ -473,7 +477,7 @@ impl<'a> Parser<'a> { Token::TypeName(str) => str, _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: name.span, token: name.token, expected: "an enumeration name", @@ -486,7 +490,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("the open brace after an enumeration name"))?; if !matches!(brace.token, Token::OpenBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: brace.span, token: brace.token, expected: "the brace after an enumeration name", @@ -504,7 +508,7 @@ impl<'a> Parser<'a> { })?; if !matches!(brace.token, Token::CloseBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: brace.span, token: brace.token, expected: "the brace at the end of an enumeration definition", @@ -535,7 +539,7 @@ impl<'a> Parser<'a> { _ => { self.save(maybe_name.clone()); return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_name.span, token: maybe_name.token, expected: "variant name (identifier starting with a capital)", @@ -555,7 +559,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("trying to parse a enumeration variant's type"))?; if !matches!(maybe_close.token, Token::CloseParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_close.span, token: maybe_close.token, expected: "close paren to end an enumeration variant's type argument", @@ -581,7 +585,7 @@ impl<'a> Parser<'a> { _ => { self.save(ender.clone()); return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: ender.span, token: ender.token, expected: "comma or close brace after enumeration variant", @@ -598,7 +602,7 @@ impl<'a> Parser<'a> { })) } - pub fn parse_function_or_value(&mut self) -> Result { + fn parse_function_or_value(&mut self) -> Result { unimplemented!() } @@ -617,17 +621,17 @@ impl<'a> Parser<'a> { } } - pub fn parse_match_expression(&mut self) -> Result { + fn parse_match_expression(&mut self) -> Result { unimplemented!() } - pub fn parse_if_expression(&mut self) -> Result { + fn parse_if_expression(&mut self) -> Result { let next = self .next()? .ok_or_else(|| self.bad_eof("looking for an 'if' to start conditional"))?; if !matches!(next.token, Token::ValueName(ref x) if x == "if") { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "an 'if' to start a conditional", @@ -677,7 +681,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for open brace to start block"))?; if !matches!(next.token, Token::OpenBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "an open brace to start a block", @@ -701,7 +705,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for statement or block close"))?; if !matches!(next.token, Token::CloseBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "a close brace to end a block", @@ -762,7 +766,7 @@ impl<'a> Parser<'a> { if !matches!(next.token, Token::ValueName(ref n) if n == "let") { self.save(next.clone()); return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "a 'let' to open a binding statement", @@ -785,7 +789,7 @@ impl<'a> Parser<'a> { Token::ValueName(v) => Name::new(self.to_location(next.span), v), _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "a variable name for the let binding", @@ -798,7 +802,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("an '=' after a variable name in a binding"))?; if !matches!(next.token, Token::OperatorName(ref x) if x == "=") { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "an '=' after the variable name in a let binding", @@ -812,7 +816,7 @@ impl<'a> Parser<'a> { .ok_or_else(|| self.bad_eof("looking for terminal semicolon for let statement"))?; if !matches!(next.token, Token::Semi) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "a semicolon to finish a let statement", @@ -839,7 +843,7 @@ impl<'a> Parser<'a> { if *pre_prec < level { self.save(next.clone()); return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "a base expression of a tighter-binding prefix operator", @@ -918,7 +922,7 @@ impl<'a> Parser<'a> { if !matches!(next.token, Token::OpenParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "open paren for call arguments", @@ -948,7 +952,7 @@ impl<'a> Parser<'a> { Token::CloseParen => break, _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "comma or close paren in function arguments", @@ -985,7 +989,7 @@ impl<'a> Parser<'a> { } else { self.save(hopefully_close.clone()); Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: hopefully_close.span, token: hopefully_close.token, expected: "close paren after expression", @@ -1012,7 +1016,7 @@ impl<'a> Parser<'a> { })?; if !matches!(closer.token, Token::CloseBrace) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: closer.span, token: closer.token, expected: "close brace or comma after field value", @@ -1028,7 +1032,7 @@ impl<'a> Parser<'a> { })?; if !matches!(second_colon.token, Token::Colon) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: second_colon.span, token: second_colon.token, expected: "second colon in enumeration value", @@ -1047,7 +1051,7 @@ impl<'a> Parser<'a> { _ => { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: vname.span, token: vname.token, expected: "enumeration value name", @@ -1063,7 +1067,7 @@ impl<'a> Parser<'a> { })?; if !matches!(tok.token, Token::CloseParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: tok.span, token: tok.token, expected: "close paren after enum value argument", @@ -1079,7 +1083,7 @@ impl<'a> Parser<'a> { } _ => Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: after_type_name.span, token: after_type_name.token, expected: "colon, open brace, or open paren in constructor", @@ -1096,7 +1100,7 @@ impl<'a> Parser<'a> { _ => { self.save(next.clone()); Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: next.span, token: next.token, expected: "some base expression or an open brace", @@ -1120,7 +1124,7 @@ impl<'a> Parser<'a> { match args.pop() { None => { return Err(ParserError::UnacceptableEof { - file_id: self.file_id, + file: self.file.clone(), place: "parsing function type or type", }); } @@ -1129,7 +1133,7 @@ impl<'a> Parser<'a> { Some(_) => { return Err(ParserError::UnacceptableEof { - file_id: self.file_id, + file: self.file.clone(), place: "looking for '->' in function type", }); } @@ -1147,7 +1151,7 @@ impl<'a> Parser<'a> { let LocatedToken { token, span } = maybe_arrow; Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span, token, expected: "'->' in function type", @@ -1193,7 +1197,7 @@ impl<'a> Parser<'a> { if !matches!(closer.token, Token::CloseParen) { return Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: closer.span, token: closer.token, expected: "close parenthesis to finish a type", @@ -1209,7 +1213,7 @@ impl<'a> Parser<'a> { }); Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span, token, expected: "type constructor, type variable, or primitive type", @@ -1239,7 +1243,7 @@ impl<'a> Parser<'a> { _ => { self.save(maybe_constant.clone()); Err(ParserError::UnexpectedToken { - file_id: self.file_id, + file: self.file.clone(), span: maybe_constant.span, token: maybe_constant.token, expected: "constant value", diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index 5960671..0e1bbd5 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -7,7 +7,7 @@ use crate::syntax::*; fn constants() { let parse_constant = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_constant() }; @@ -65,7 +65,7 @@ fn constants() { fn types() { let parse_type = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_type() }; @@ -123,7 +123,7 @@ fn types() { fn type_restrictions() { let parse_tr = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_type_restrictions() }; @@ -195,7 +195,7 @@ fn type_restrictions() { fn field_definition() { let parse_fd = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_field_definition() }; @@ -249,7 +249,7 @@ fn field_definition() { fn structures() { let parse_st = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_structure() }; @@ -325,7 +325,7 @@ fn structures() { fn enum_variant() { let parse_ev = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_enum_variant() }; @@ -382,7 +382,7 @@ fn enum_variant() { fn enumerations() { let parse_en = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_enumeration() }; @@ -418,7 +418,7 @@ fn enumerations() { fn expressions() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; @@ -451,7 +451,7 @@ fn expressions() { fn enumeration_values() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; @@ -472,7 +472,7 @@ fn enumeration_values() { fn structure_value() { let parse_st = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; @@ -521,7 +521,7 @@ fn structure_value() { fn infix_and_precedence() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.add_infix_precedence("+", parse::Associativity::Left, 6); result.add_infix_precedence("*", parse::Associativity::Right, 7); result.parse_expression() @@ -625,7 +625,7 @@ fn infix_and_precedence() { fn calls() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.add_infix_precedence("+", parse::Associativity::Left, 6); result.add_infix_precedence("*", parse::Associativity::Right, 7); result.parse_expression() @@ -766,7 +766,7 @@ fn calls() { fn prefix_and_postfix() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.add_infix_precedence("+", parse::Associativity::Left, 4); result.add_infix_precedence("*", parse::Associativity::Left, 8); result.add_prefix_precedence("++", 6); @@ -840,7 +840,7 @@ fn prefix_and_postfix() { fn blocks() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; @@ -879,7 +879,7 @@ fn blocks() { fn bindings() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; @@ -896,7 +896,7 @@ fn bindings() { fn conditionals() { let parse_ex = |str| { let lexer = Lexer::from(str); - let mut result = Parser::new(0, lexer); + let mut result = Parser::new("test", lexer); result.parse_expression() }; -- 2.53.0 From 9ea6868938b10fa717e988df5a1dd5a0c183875f Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Sat, 11 Oct 2025 14:46:02 -0700 Subject: [PATCH 14/33] Shifting and naming. --- src/syntax.rs | 271 +------------------------------------ src/syntax/ast.rs | 228 +++++++++++++++++++++++++++++++ src/syntax/error.rs | 17 ++- src/syntax/location.rs | 5 +- src/syntax/name.rs | 4 + src/syntax/parse.rs | 111 ++++++++++++--- src/syntax/parser_tests.rs | 165 +++++++++++----------- src/syntax/tokens.rs | 6 +- src/syntax/universe.rs | 48 +++++++ 9 files changed, 478 insertions(+), 377 deletions(-) create mode 100644 src/syntax/ast.rs create mode 100644 src/syntax/universe.rs diff --git a/src/syntax.rs b/src/syntax.rs index 4476080..d15f8f9 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,277 +1,14 @@ +mod ast; mod error; mod location; mod name; mod parse; #[cfg(test)] mod parser_tests; -pub mod tokens; +mod tokens; +mod universe; pub use crate::syntax::error::ParserError; -use crate::syntax::parse::Parser; -use crate::syntax::tokens::Lexer; -use internment::ArcIntern; +pub use ast::*; pub use location::{Located, Location}; -use memmap2::Mmap; pub use name::Name; -use proptest_derive::Arbitrary; -use std::collections::HashMap; -use std::fmt::Debug; -use std::ops::Range; -use std::path::{Path, PathBuf}; - -pub struct Universe { - pub files: HashMap, - pub modules: HashMap, -} - -impl Default for Universe { - fn default() -> Self { - Universe { - files: HashMap::new(), - modules: HashMap::new(), - } - } -} - -impl Universe { - pub fn add_file>(&mut self, file: P) -> Result<(), ParserError> { - let filename = file.as_ref().to_string_lossy().into_owned(); - - let file_handle = std::fs::File::open(&file) - .map_err(|e| ParserError::OpenError { - file: filename.clone(), - error: e, - })?; - let contents = unsafe { Mmap::map(&file_handle) } - .map_err(|e| ParserError::ReadError { - file: filename.clone(), - error: e, - })?; - let string_contents = std::str::from_utf8(&contents) - .map_err(|e| ParserError::Utf8Error { - file: filename.clone(), - error: e, - })?; - - let lexer = Lexer::from(string_contents); - let mut parser = Parser::new(&file, lexer); - let module = parser.parse_module()?; - self.modules.insert(file.as_ref().to_path_buf(), module); - - Ok(()) - } -} - -#[derive(Debug)] -pub struct Module { - definitions: Vec, -} - -#[derive(Debug)] -pub struct Definition { - location: Location, - export: ExportClass, - type_restrictions: TypeRestrictions, - definition: Def, -} - -impl Located for Definition { - fn location(&self) -> Location { - self.location.clone() - } -} - -#[derive(Debug)] -pub enum Def { - Enumeration(EnumerationDef), - Structure(StructureDef), - Function(FunctionDef), - Value(ValueDef), -} - -impl Located for Def { - fn location(&self) -> Location { - match self { - Def::Enumeration(def) => def.location.clone(), - Def::Structure(def) => def.location.clone(), - Def::Function(def) => def.location.clone(), - Def::Value(def) => def.location.clone(), - } - } -} - -#[derive(Debug)] -pub struct EnumerationDef { - name: String, - location: Location, - variants: Vec, -} - -#[derive(Debug)] -pub struct EnumerationVariant { - location: Location, - name: String, - argument: Option, -} - -#[derive(Debug)] -pub struct StructureDef { - name: String, - location: Location, - fields: Vec, -} - -#[derive(Debug)] -pub struct StructureField { - location: Location, - export: ExportClass, - name: String, - field_type: Option, -} - -#[derive(Debug)] -pub struct FunctionDef { - name: String, - location: Location, - arguments: Vec, - return_type: Option, - body: Vec, -} - -#[derive(Debug)] -pub struct FunctionArg { - name: String, - arg_type: Option, -} - -#[derive(Debug)] -pub struct ValueDef { - name: String, - location: Location, - value: Expression, -} - -#[derive(Debug)] -pub enum ExportClass { - Public, - Private, -} - -#[derive(Debug)] -pub enum Statement { - Binding(BindingStmt), - Expression(Expression), -} - -#[derive(Debug)] -pub struct BindingStmt { - location: Location, - mutable: bool, - variable: Name, - value: Expression, -} - -#[derive(Debug)] -pub enum Expression { - Value(ConstantValue), - Reference(Name), - EnumerationValue(Name, Name, Option>), - StructureValue(Name, Vec), - Conditional(ConditionalExpr), - Call(Box, CallKind, Vec), - Block(Location, Vec), -} - -#[derive(Debug)] -pub struct ConditionalExpr { - location: Location, - test: Box, - consequent: Box, - alternative: Option>, -} - -#[derive(Debug)] -pub enum CallKind { - Infix, - Normal, - Postfix, - Prefix, -} - -#[derive(Debug)] -pub struct FieldValue { - field: Name, - value: Expression, -} - -#[derive(Debug)] -pub struct TypeRestrictions { - restrictions: Vec, -} - -impl TypeRestrictions { - fn empty() -> Self { - TypeRestrictions { - restrictions: vec![], - } - } -} - -#[derive(Debug)] -pub struct TypeRestriction { - constructor: Type, - arguments: Vec, -} - -#[derive(Debug)] -pub enum Type { - Constructor(Location, String), - Variable(Location, String), - Primitive(Location, String), - Application(Box, Vec), - Function(Vec, Box), -} - -impl Located for Type { - fn location(&self) -> Location { - match self { - Type::Constructor(l, _) => l.clone(), - Type::Variable(l, _) => l.clone(), - Type::Primitive(l, _) => l.clone(), - Type::Application(t1, ts) => { - let mut result = t1.location(); - if let Some(last) = ts.last() { - result = result.extend_to(&last.location()); - } - result - } - Type::Function(args, ret) => { - if let Some(first) = args.first() { - first.location().extend_to(&ret.location()) - } else { - ret.location() - } - } - } - } -} - -#[derive(Debug)] -pub enum ConstantValue { - Integer(Location, IntegerWithBase), - Character(Location, char), - String(Location, String), -} - -#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] -pub struct IntegerWithBase { - #[proptest(strategy = "proptest::prop_oneof![ \ - proptest::strategy::Just(None), \ - proptest::strategy::Just(Some(2)), \ - proptest::strategy::Just(Some(8)), \ - proptest::strategy::Just(Some(10)), \ - proptest::strategy::Just(Some(16)), \ - ]")] - base: Option, - value: u64, -} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..a25fbae --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,228 @@ +use crate::syntax::location::{Located, Location}; +use crate::syntax::name::Name; +use proptest_derive::Arbitrary; + +#[derive(Debug)] +pub struct Module { + pub definitions: Vec, +} + +#[derive(Debug)] +pub struct Definition { + pub location: Location, + pub export: ExportClass, + pub type_restrictions: TypeRestrictions, + pub definition: Def, +} + +impl Located for Definition { + fn location(&self) -> Location { + self.location.clone() + } +} + +#[derive(Debug)] +pub enum Def { + Enumeration(EnumerationDef), + Structure(StructureDef), + Function(FunctionDef), + Value(ValueDef), +} + +impl Located for Def { + fn location(&self) -> Location { + match self { + Def::Enumeration(def) => def.location.clone(), + Def::Structure(def) => def.location.clone(), + Def::Function(def) => def.location.clone(), + Def::Value(def) => def.location.clone(), + } + } +} + +#[derive(Debug)] +pub struct EnumerationDef { + pub name: Name, + pub location: Location, + pub variants: Vec, +} + +#[derive(Debug)] +pub struct EnumerationVariant { + pub location: Location, + pub name: Name, + pub argument: Option, +} + +#[derive(Debug)] +pub struct StructureDef { + pub name: Name, + pub location: Location, + pub fields: Vec, +} + +#[derive(Debug)] +pub struct StructureField { + pub location: Location, + pub export: ExportClass, + pub name: Name, + pub field_type: Option, +} + +#[derive(Debug)] +pub struct FunctionDef { + pub name: Name, + pub location: Location, + pub arguments: Vec, + pub return_type: Option, + pub body: Vec, +} + +#[derive(Debug)] +pub struct FunctionArg { + pub name: Name, + pub arg_type: Option, +} + +#[derive(Debug)] +pub struct ValueDef { + pub name: Name, + pub location: Location, + pub value: Expression, +} + +#[derive(Debug)] +pub enum ExportClass { + Public, + Private, +} + +#[derive(Debug)] +pub enum Statement { + Binding(BindingStmt), + Expression(Expression), +} + +#[derive(Debug)] +pub struct BindingStmt { + pub location: Location, + pub mutable: bool, + pub variable: Name, + pub value: Expression, +} + +#[derive(Debug)] +pub enum Expression { + Value(ConstantValue), + Reference(Name), + EnumerationValue(Name, Name, Option>), + StructureValue(Name, Vec), + Conditional(ConditionalExpr), + Match(MatchExpr), + Call(Box, CallKind, Vec), + Block(Location, Vec), +} + +#[derive(Debug)] +pub struct ConditionalExpr { + pub location: Location, + pub test: Box, + pub consequent: Box, + pub alternative: Option>, +} + +#[derive(Debug)] +pub struct MatchExpr { + pub location: Location, + pub value: Box, + pub cases: Vec, +} + +#[derive(Debug)] +pub struct MatchCase {} + +#[derive(Debug)] +pub enum CallKind { + Infix, + Normal, + Postfix, + Prefix, +} + +#[derive(Debug)] +pub struct FieldValue { + pub field: Name, + pub value: Expression, +} + +#[derive(Debug)] +pub struct TypeRestrictions { + pub restrictions: Vec, +} + +impl TypeRestrictions { + pub fn empty() -> Self { + TypeRestrictions { + restrictions: vec![], + } + } +} + +#[derive(Debug)] +pub struct TypeRestriction { + pub constructor: Type, + pub arguments: Vec, +} + +#[derive(Debug)] +pub enum Type { + Constructor(Location, Name), + Variable(Location, Name), + Primitive(Location, Name), + Application(Box, Vec), + Function(Vec, Box), +} + +impl Located for Type { + fn location(&self) -> Location { + match self { + Type::Constructor(l, _) => l.clone(), + Type::Variable(l, _) => l.clone(), + Type::Primitive(l, _) => l.clone(), + Type::Application(t1, ts) => { + let mut result = t1.location(); + if let Some(last) = ts.last() { + result = result.extend_to(&last.location()); + } + result + } + Type::Function(args, ret) => { + if let Some(first) = args.first() { + first.location().extend_to(&ret.location()) + } else { + ret.location() + } + } + } + } +} + +#[derive(Debug)] +pub enum ConstantValue { + Integer(Location, IntegerWithBase), + Character(Location, char), + String(Location, String), +} + +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub struct IntegerWithBase { + #[proptest(strategy = "proptest::prop_oneof![ \ + proptest::strategy::Just(None), \ + proptest::strategy::Just(Some(2)), \ + proptest::strategy::Just(Some(8)), \ + proptest::strategy::Just(Some(10)), \ + proptest::strategy::Just(Some(16)), \ + ]")] + pub base: Option, + pub value: u64, +} diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 7700cb1..33e3b42 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -1,17 +1,23 @@ //use codespan_reporting::diagnostic::{Diagnostic, Label}; use crate::syntax::tokens::Token; +use internment::ArcIntern; use std::ops::Range; use std::path::PathBuf; -use internment::ArcIntern; use thiserror::Error; #[derive(Debug, Error)] pub enum ParserError { #[error("Lexer error at {file}: {error}")] - LexerError { file: ArcIntern, error: LexerError }, + LexerError { + file: ArcIntern, + error: LexerError, + }, #[error("Unacceptable end of file at {file} while {place}")] - UnacceptableEof { file: ArcIntern, place: &'static str }, + UnacceptableEof { + file: ArcIntern, + place: &'static str, + }, #[error("Unexpected token at {file}: expected {expected}, saw {token}")] UnexpectedToken { @@ -28,7 +34,10 @@ pub enum ParserError { ReadError { file: String, error: std::io::Error }, #[error("UTF-8 problem reading file {file}: {error}")] - Utf8Error { file: String, error: std::str::Utf8Error }, + Utf8Error { + file: String, + error: std::str::Utf8Error, + }, } #[derive(Clone, Debug, Error, PartialEq)] diff --git a/src/syntax/location.rs b/src/syntax/location.rs index 104d6d8..cd8bc78 100644 --- a/src/syntax/location.rs +++ b/src/syntax/location.rs @@ -32,7 +32,10 @@ impl Span for Location { impl Location { pub fn new(file: &ArcIntern, span: Range) -> Self { - Location { file: file.clone(), span } + Location { + file: file.clone(), + span, + } } pub fn extend_to(&self, other: &Location) -> Location { diff --git a/src/syntax/name.rs b/src/syntax/name.rs index fbfdb5f..f8069ad 100644 --- a/src/syntax/name.rs +++ b/src/syntax/name.rs @@ -57,4 +57,8 @@ impl Name { pub fn as_printed(&self) -> &str { self.printable.as_str() } + + pub fn bind_to(&mut self, other: &Name) { + self.identifier = other.identifier; + } } diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index d6c0d80..662cfe3 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -3,6 +3,8 @@ use crate::syntax::tokens::{Lexer, LocatedToken, Token}; use crate::syntax::*; use internment::ArcIntern; use std::collections::HashMap; +use std::ops::Range; +use std::path::{Path, PathBuf}; pub struct Parser<'lexer> { file: ArcIntern, @@ -26,10 +28,7 @@ impl<'lexer> Parser<'lexer> { /// error messages. If you don't care about either, you can use /// 0 with no loss of functionality. (Obviously, it will be harder /// to create quality error messages, but you already knew that.) - pub fn new>( - file: P, - lexer: Lexer<'lexer> - ) -> Parser<'lexer> { + pub fn new>(file: P, lexer: Lexer<'lexer>) -> Parser<'lexer> { Parser { file: ArcIntern::new(file.as_ref().to_path_buf()), lexer, @@ -200,10 +199,12 @@ impl<'lexer> Parser<'lexer> { let constructor = match maybe_constructor.token { Token::TypeName(str) => { - Type::Constructor(self.to_location(maybe_constructor.span), str) + let name = Name::new(self.to_location(maybe_constructor.span.clone()), str); + Type::Constructor(self.to_location(maybe_constructor.span), name) } Token::PrimitiveTypeName(str) => { - Type::Primitive(self.to_location(maybe_constructor.span), str) + let name = Name::new(self.to_location(maybe_constructor.span.clone()), str); + Type::Primitive(self.to_location(maybe_constructor.span), name) } token @ Token::CloseParen | token @ Token::Comma => { @@ -289,7 +290,7 @@ impl<'lexer> Parser<'lexer> { .next()? .ok_or_else(|| self.bad_eof("looking for structure name"))?; let structure_name = match name.token { - Token::TypeName(str) => str, + Token::TypeName(str) => Name::new(self.to_location(name.span), str), _ => { return Err(ParserError::UnexpectedToken { file: self.file.clone(), @@ -385,7 +386,7 @@ impl<'lexer> Parser<'lexer> { .ok_or_else(|| self.bad_eof("parsing field definition"))?; let name = match maybe_name.token { - Token::ValueName(x) => x, + Token::ValueName(x) => Name::new(self.to_location(maybe_name.span.clone()), x), _ => { self.save(maybe_name.clone()); if matches!(export, ExportClass::Private) { @@ -474,7 +475,7 @@ impl<'lexer> Parser<'lexer> { .next()? .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; let enumeration_name = match name.token { - Token::TypeName(str) => str, + Token::TypeName(str) => Name::new(self.to_location(name.span), str), _ => { return Err(ParserError::UnexpectedToken { file: self.file.clone(), @@ -531,7 +532,7 @@ impl<'lexer> Parser<'lexer> { .next()? .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; let name = match maybe_name.token { - Token::TypeName(x) => x, + Token::TypeName(x) => Name::new(self.to_location(maybe_name.span.clone()), x), Token::CloseBrace => { self.save(maybe_name); return Ok(None); @@ -613,7 +614,9 @@ impl<'lexer> Parser<'lexer> { self.save(next.clone()); match next.token { - Token::ValueName(x) if x == "match" => self.parse_match_expression(), + Token::ValueName(x) if x == "match" => { + Ok(Expression::Match(self.parse_match_expression()?)) + } Token::ValueName(x) if x == "if" => { Ok(Expression::Conditional(self.parse_if_expression()?)) } @@ -621,7 +624,64 @@ impl<'lexer> Parser<'lexer> { } } - fn parse_match_expression(&mut self) -> Result { + fn parse_match_expression(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a 'match' to open a pattern match"))?; + + if !matches!(next.token, Token::ValueName(ref x) if x == "match") { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "an 'match' to start a pattern match", + }); + } + let start = self.to_location(next.span); + + let value = Box::new(self.parse_arithmetic(0)?); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; + if !matches!(next.token, Token::OpenBrace) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "an open brace after the match expression", + }); + } + + let mut cases = vec![]; + + while let Some(case) = self.parse_match_case()? { + cases.push(case); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; + if !matches!(next.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "a close brace to end a match expression", + }); + } + let end = self.to_location(next.span); + + let location = start.extend_to(&end); + + Ok(MatchExpr { + location, + value, + cases, + }) + } + + fn parse_match_case(&mut self) -> Result, ParserError> { unimplemented!() } @@ -668,7 +728,7 @@ impl<'lexer> Parser<'lexer> { }; Ok(ConditionalExpr { - location, + location: start.extend_to(&location), test: Box::new(test), consequent: Box::new(consequent), alternative, @@ -1164,8 +1224,14 @@ impl<'lexer> Parser<'lexer> { self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; let constructor = match token { - Token::TypeName(x) => Type::Constructor(self.to_location(span), x), - Token::PrimitiveTypeName(x) => Type::Primitive(self.to_location(span), x), + Token::TypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Type::Constructor(self.to_location(span), name) + } + Token::PrimitiveTypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Type::Primitive(self.to_location(span), name) + } _ => { self.save(LocatedToken { token, span }); return self.parse_base_type(); @@ -1186,9 +1252,18 @@ impl<'lexer> Parser<'lexer> { self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; match token { - Token::TypeName(x) => Ok(Type::Constructor(self.to_location(span), x)), - Token::PrimitiveTypeName(x) => Ok(Type::Primitive(self.to_location(span), x)), - Token::ValueName(x) => Ok(Type::Variable(self.to_location(span), x)), + Token::TypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Constructor(self.to_location(span), name)) + } + Token::PrimitiveTypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Primitive(self.to_location(span), name)) + } + Token::ValueName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Variable(self.to_location(span), name)) + } Token::OpenParen => { let t = self.parse_type()?; let closer = self diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index 0e1bbd5..aec9810 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -72,50 +72,47 @@ fn types() { assert!(matches!( parse_type("Cons"), Ok(Type::Application(cons, empty)) if - matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + matches!(cons.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && empty.is_empty() )); assert!(matches!( parse_type("cons"), - Ok(Type::Variable(_, c)) if c == "cons" + Ok(Type::Variable(_, c)) if c.as_printed() == "cons" )); assert!(matches!( parse_type("Cons a b"), Ok(Type::Application(a, b)) - if matches!(a.as_ref(), Type::Constructor(_, c) if c == "Cons") && + if matches!(a.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] - if b1 == "a" && b2 == "b") + if b1.as_printed() == "a" && b2.as_printed() == "b") )); assert!(matches!( parse_type("a -> z"), Ok(Type::Function(a, z)) - if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1.as_printed() == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") )); - println!("-------------"); - println!("{:?}", parse_type("(a -> z)")); - println!("-------------"); assert!(matches!( parse_type("(a -> z)"), Ok(Type::Function(a, z)) - if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1 == "a") && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1.as_printed() == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") )); assert!(matches!( parse_type("a b -> z"), Ok(Type::Function(a, z)) if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)] - if a1 == "a" && b1 == "b") && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + if a1.as_printed() == "a" && b1.as_printed() == "b") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") )); assert!(matches!( parse_type("Cons a b -> z"), Ok(Type::Function(a, z)) if matches!(a.as_slice(), [Type::Application(cons, appargs)] - if matches!(cons.as_ref(), Type::Constructor(_, c) if c == "Cons") && + if matches!(cons.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] - if b1 == "a" && b2 == "b")) && - matches!(z.as_ref(), Type::Variable(_, z1) if z1 == "z") + if b1.as_printed() == "a" && b2.as_printed() == "b")) && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") )); } @@ -138,10 +135,10 @@ fn type_restrictions() { matches!(&restrictions[0], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && arguments.len() == 2 && - matches!(&arguments[0], Type::Variable(_, x) if x == "a") && - matches!(&arguments[1], Type::Variable(_, x) if x == "b")))); + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); assert!(matches!( parse_tr("restrict(Cons a b,)"), @@ -149,10 +146,10 @@ fn type_restrictions() { matches!(&restrictions[0], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && arguments.len() == 2 && - matches!(&arguments[0], Type::Variable(_, x) if x == "a") && - matches!(&arguments[1], Type::Variable(_, x) if x == "b")))); + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); assert!(matches!(parse_tr("restrict(,Cons a b,)"), Err(_))); @@ -162,16 +159,16 @@ fn type_restrictions() { matches!(&restrictions[0], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && arguments.len() == 2 && - matches!(&arguments[0], Type::Variable(_, x) if x == "a") && - matches!(&arguments[1], Type::Variable(_, x) if x == "b")) && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")) && matches!(&restrictions[1], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Monad") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Monad") && arguments.len() == 1 && - matches!(&arguments[0], Type::Variable(_, x) if x == "m")))); + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "m")))); assert!(matches!( parse_tr("restrict(Cons a b, Monad m,)"), @@ -179,16 +176,16 @@ fn type_restrictions() { matches!(&restrictions[0], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Cons") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && arguments.len() == 2 && - matches!(&arguments[0], Type::Variable(_, x) if x == "a") && - matches!(&arguments[1], Type::Variable(_, x) if x == "b")) && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")) && matches!(&restrictions[1], TypeRestriction { constructor, arguments, - } if matches!(constructor, Type::Constructor(_, x) if x == "Monad") && + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Monad") && arguments.len() == 1 && - matches!(&arguments[0], Type::Variable(_, x) if x == "m")))); + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "m")))); } #[test] @@ -203,46 +200,46 @@ fn field_definition() { assert!(matches!( parse_fd("foo,"), Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) - if name == "foo" + if name.as_printed() == "foo" )); assert!(matches!( parse_fd("foo}"), Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) - if name == "foo" + if name.as_printed() == "foo" )); assert!(matches!( parse_fd("foo: Word8,"), Ok(Some(StructureField{ name, field_type, .. })) - if name == "foo" && + if name.as_printed() == "foo" && matches!(&field_type, Some(Type::Application(c, args)) - if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && args.is_empty()))); assert!(matches!( parse_fd("foo: Cons a b,"), Ok(Some(StructureField{ name, field_type, .. })) - if name == "foo" && + if name.as_printed() == "foo" && matches!(&field_type, Some(Type::Application(c, args)) - if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Cons") && + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && matches!(&args.as_slice(), &[Type::Variable(_, v1), Type::Variable(_, v2)] - if v1 == "a" && v2 == "b")))); + if v1.as_printed() == "a" && v2.as_printed() == "b")))); assert!(matches!( parse_fd("foo: a -> b,"), Ok(Some(StructureField{ name, field_type, .. })) - if name == "foo" && + if name.as_printed() == "foo" && matches!(&field_type, Some(Type::Function(args, ret)) - if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") && - matches!(ret.as_ref(), Type::Variable(_, b) if b == "b")))); + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a.as_printed() == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b.as_printed() == "b")))); assert!(matches!( parse_fd("export foo: a -> b,"), Ok(Some(StructureField{ name, export: ExportClass::Public, field_type, .. })) - if name == "foo" && + if name.as_printed() == "foo" && matches!(&field_type, Some(Type::Function(args, ret)) - if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a == "a") && - matches!(ret.as_ref(), Type::Variable(_, b) if b == "b")))); + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a.as_printed() == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b.as_printed() == "b")))); } #[test] @@ -260,65 +257,65 @@ fn structures() { assert!(matches!( parse_st("structure Foo {}"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && fields.is_empty())); + if name.as_printed() == "Foo" && fields.is_empty())); assert!(matches!( parse_st("structure Foo { bar }"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && + if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] - if name == "bar" && matches!(field_type, None)))); + if name.as_printed() == "bar" && matches!(field_type, None)))); assert!(matches!( parse_st("structure Foo { bar: Word8 }"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && + if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] - if name == "bar" && + if name.as_printed() == "bar" && matches!(field_type, Some(Type::Application(c, args)) - if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && args.is_empty())))); assert!(matches!( parse_st("structure Foo { bar: Word8, goo }"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && + if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }, StructureField { name: ref name2, field_type: None, .. }] - if name == "bar" && - name2 == "goo" && + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && matches!(field_type, Some(Type::Application(c, args)) - if matches!(c.as_ref(), Type::Constructor(_, c) if c == "Word8") && + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && args.is_empty())))); assert!(matches!( parse_st("structure Foo { bar: b c -> a, goo }"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && + if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }, StructureField { name: ref name2, field_type: None, .. }] - if name == "bar" && - name2 == "goo" && + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && matches!(field_type, Some(Type::Function(args, ret)) if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] - if b == "b" && c == "c") && - matches!(ret.as_ref(), Type::Variable(_, a) if a == "a"))))); + if b.as_printed() == "b" && c.as_printed() == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a.as_printed() == "a"))))); assert!(matches!( parse_st("structure Foo { bar: b c -> a, goo, }"), Ok(StructureDef { name, fields, .. }) - if name == "Foo" && + if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }, StructureField { name: ref name2, field_type: None, .. }] - if name == "bar" && - name2 == "goo" && + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && matches!(field_type, Some(Type::Function(args, ret)) if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] - if b == "b" && c == "c") && - matches!(ret.as_ref(), Type::Variable(_, a) if a == "a"))))); + if b.as_printed() == "b" && c.as_printed() == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a.as_printed() == "a"))))); } #[test] @@ -339,43 +336,43 @@ fn enum_variant() { assert!(matches!( parse_ev("Cons,"), Ok(Some(EnumerationVariant { name, argument, .. })) - if name == "Cons" && argument.is_none())); + if name.as_printed() == "Cons" && argument.is_none())); assert!(matches!( parse_ev("Cons }"), Ok(Some(EnumerationVariant { name, argument, .. })) - if name == "Cons" && argument.is_none())); + if name.as_printed() == "Cons" && argument.is_none())); assert!(matches!( parse_ev("Cons, }"), Ok(Some(EnumerationVariant { name, argument, .. })) - if name == "Cons" && argument.is_none())); + if name.as_printed() == "Cons" && argument.is_none())); assert!(matches!( parse_ev("Cons(Pair a),"), Ok(Some(EnumerationVariant { name, ref argument, .. })) - if name == "Cons" && + if name.as_printed() == "Cons" && matches!(argument, Some(Type::Application(typef, args)) if matches!(typef.as_ref(), Type::Constructor(_, name) - if name == "Pair") && + if name.as_printed() == "Pair") && matches!(&args.as_slice(), &[Type::Variable(_, argname)] - if argname == "a")))); + if argname.as_printed() == "a")))); assert!(matches!( parse_ev("Cons(Pair a) }"), Ok(Some(EnumerationVariant { name, ref argument, .. })) - if name == "Cons" && + if name.as_printed() == "Cons" && matches!(argument, Some(Type::Application(typef, args)) if matches!(typef.as_ref(), Type::Constructor(_, name) - if name == "Pair") && + if name.as_printed() == "Pair") && matches!(&args.as_slice(), &[Type::Variable(_, argname)] - if argname == "a")))); + if argname.as_printed() == "a")))); assert!(matches!( parse_ev("Cons(a b -> c) }"), Ok(Some(EnumerationVariant { name, ref argument, .. })) - if name == "Cons" && + if name.as_printed() == "Cons" && matches!(argument, Some(Type::Function(args, ret)) if matches!(&args.as_slice(), &[Type::Variable(_, a), Type::Variable(_, b)] - if a == "a" && b == "b") && - matches!(ret.as_ref(), Type::Variable(_, c) if c == "c")))); + if a.as_printed() == "a" && b.as_printed() == "b") && + matches!(ret.as_ref(), Type::Variable(_, c) if c.as_printed() == "c")))); } #[test] @@ -393,25 +390,25 @@ fn enumerations() { assert!(matches!( parse_en("enumeration Empty { }"), Ok(EnumerationDef { name, variants, .. }) - if name == "Empty" && variants.is_empty())); + if name.as_printed() == "Empty" && variants.is_empty())); assert!(matches!( parse_en("enumeration Alternates { A, B }"), Ok(EnumerationDef { name, variants, .. }) - if name == "Alternates" && + if name.as_printed() == "Alternates" && matches!(&variants.as_slice(), &[ EnumerationVariant { name: name1, argument: arg1, ..}, EnumerationVariant { name: name2, argument: arg2, ..}, - ] if name1 == "A" && arg1.is_none() && - name2 == "B" && arg2.is_none()))); + ] if name1.as_printed() == "A" && arg1.is_none() && + name2.as_printed() == "B" && arg2.is_none()))); assert!(matches!( parse_en("enumeration Alternates { A, B, }"), Ok(EnumerationDef { name, variants, .. }) - if name == "Alternates" && + if name.as_printed() == "Alternates" && matches!(&variants.as_slice(), &[ EnumerationVariant { name: name1, argument: arg1, ..}, EnumerationVariant { name: name2, argument: arg2, ..}, - ] if name1 == "A" && arg1.is_none() && - name2 == "B" && arg2.is_none()))); + ] if name1.as_printed() == "A" && arg1.is_none() && + name2.as_printed() == "B" && arg2.is_none()))); } #[test] diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index a990409..13c94fa 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -89,7 +89,7 @@ impl fmt::Display for Token { pub enum Lexer<'a> { Working(LexerState<'a>), Errored(LexerError), - Done(usize), + Done, } struct LexerState<'a> { @@ -120,7 +120,7 @@ impl<'a> Iterator for Lexer<'a> { fn next(&mut self) -> Option { match self { - Lexer::Done(_) => None, + Lexer::Done => None, Lexer::Errored(e) => Some(Err(e.clone())), Lexer::Working(state) => match state.next_token() { Err(e) => { @@ -130,7 +130,7 @@ impl<'a> Iterator for Lexer<'a> { } Ok(None) => { - *self = Lexer::Done(state.stream.offset()); + *self = Lexer::Done; None } diff --git a/src/syntax/universe.rs b/src/syntax/universe.rs new file mode 100644 index 0000000..0e60a05 --- /dev/null +++ b/src/syntax/universe.rs @@ -0,0 +1,48 @@ +use crate::syntax::ast::*; +use crate::syntax::error::ParserError; +use crate::syntax::parse::Parser; +use crate::syntax::tokens::Lexer; +use memmap2::Mmap; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +pub struct Universe { + pub files: HashMap, + pub modules: HashMap, +} + +impl Default for Universe { + fn default() -> Self { + Universe { + files: HashMap::new(), + modules: HashMap::new(), + } + } +} + +impl Universe { + pub fn add_file>(&mut self, file: P) -> Result<(), ParserError> { + let filename = file.as_ref().to_string_lossy().into_owned(); + + let file_handle = std::fs::File::open(&file).map_err(|e| ParserError::OpenError { + file: filename.clone(), + error: e, + })?; + let contents = unsafe { Mmap::map(&file_handle) }.map_err(|e| ParserError::ReadError { + file: filename.clone(), + error: e, + })?; + let string_contents = + std::str::from_utf8(&contents).map_err(|e| ParserError::Utf8Error { + file: filename.clone(), + error: e, + })?; + + let lexer = Lexer::from(string_contents); + let mut parser = Parser::new(&file, lexer); + let module = parser.parse_module()?; + self.modules.insert(file.as_ref().to_path_buf(), module); + + Ok(()) + } +} -- 2.53.0 From 7bd242a64121be833f7282c0b7018003e73815d9 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Thu, 23 Oct 2025 09:26:15 -0700 Subject: [PATCH 15/33] Pattern parsing seems working. --- src/syntax/ast.rs | 28 +++- src/syntax/parse.rs | 282 +++++++++++++++++++++++++++++++++++-- src/syntax/parser_tests.rs | 65 +++++++++ 3 files changed, 360 insertions(+), 15 deletions(-) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index a25fbae..1d5594c 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -139,7 +139,33 @@ pub struct MatchExpr { } #[derive(Debug)] -pub struct MatchCase {} +pub struct MatchCase { + pub pattern: Pattern, + pub consequent: Expression, +} + +#[derive(Debug)] +pub enum Pattern { + Constant(ConstantValue), + Variable(Name), + EnumerationValue(EnumerationPattern), + Structure(StructurePattern), +} + +#[derive(Debug)] +pub struct EnumerationPattern { + pub location: Location, + pub type_name: Name, + pub variant_name: Name, + pub argument: Option>, +} + +#[derive(Debug)] +pub struct StructurePattern { + pub location: Location, + pub type_name: Name, + pub fields: Vec<(Name, Option)>, +} #[derive(Debug)] pub enum CallKind { diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 662cfe3..f9e1052 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -682,7 +682,256 @@ impl<'lexer> Parser<'lexer> { } fn parse_match_case(&mut self) -> Result, ParserError> { - unimplemented!() + // skip over anything we can just skip + loop { + let peeked = self + .next()? + .ok_or_else(|| self.bad_eof("looking for match case"))?; + + if matches!(peeked.token, Token::Comma) { + continue; + } + + let stop = matches!(peeked.token, Token::CloseBrace); + + self.save(peeked); + if stop { + return Ok(None); + } + + break; + } + + let pattern = self.parse_pattern()?; + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; + if !matches!(next.token, Token::Arrow) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "an arrow after a pattern, as part of a match case", + }); + } + + let consequent = self.parse_expression()?; + + Ok(Some(MatchCase { + pattern, + consequent, + })) + } + + pub fn parse_pattern(&mut self) -> Result { + if let Ok(constant) = self.parse_constant() { + return Ok(Pattern::Constant(constant)); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a pattern to match"))?; + + match next.token { + Token::ValueName(x) => { + let name = Name::new(self.to_location(next.span), x); + Ok(Pattern::Variable(name)) + } + + Token::TypeName(x) => { + let type_name = Name::new(self.to_location(next.span.clone()), x); + let start = self.to_location(next.span); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a pattern to match"))?; + match next.token { + Token::OpenBrace => { + let mut fields = vec![]; + + while let Some(field_pattern) = self.parse_field_pattern()? { + fields.push(field_pattern) + } + + let final_brace = self.next()?.ok_or_else(|| { + self.bad_eof("looking for closing brace in structure pattern.") + })?; + if !matches!(final_brace.token, Token::CloseBrace) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: final_brace.span, + token: final_brace.token, + expected: "closing brace in structure pattern", + }); + } + let final_brace_location = self.to_location(final_brace.span); + + let structure_pattern = StructurePattern { + location: start.extend_to(&final_brace_location), + type_name, + fields, + }; + + Ok(Pattern::Structure(structure_pattern)) + } + + Token::Colon => { + let second_colon = self.next()?.ok_or_else(|| { + self.bad_eof("looking for second colon in an enumeration pattern") + })?; + if !matches!(second_colon.token, Token::Colon) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: second_colon.span, + token: second_colon.token, + expected: "second colon in an enumeration pattern", + }); + } + + let vname = self.next()?.ok_or_else(|| { + self.bad_eof("looking for enumeration value name in pattern") + })?; + + let variant_name = match vname.token { + Token::TypeName(s) => { + let loc = self.to_location(vname.span.clone()); + Name::new(loc, s) + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: vname.span, + token: vname.token, + expected: "enumeration value name in pattern", + }); + } + }; + + let mut final_location = self.to_location(vname.span); + + let argument = if let Some(maybe_paren) = self.next()? { + if matches!(maybe_paren.token, Token::OpenParen) { + let sub_pattern = self.parse_pattern()?; + + let tok = self.next()?.ok_or_else(|| { + self.bad_eof("looking for close paren after enum value argument") + })?; + if !matches!(tok.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: tok.span, + token: tok.token, + expected: "close paren after enum value argument", + }); + } + + final_location = self.to_location(tok.span); + + Some(Box::new(sub_pattern)) + } else { + None + } + } else { + None + }; + + let location = start.extend_to(&final_location); + + let pattern = EnumerationPattern { + location, + type_name, + variant_name, + argument, + }; + + Ok(Pattern::EnumerationValue(pattern)) + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "An '::' or '{' after a type name in a pattern", + }), + } + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "The start of a pattern: a variable name or type name", + }), + } + } + + fn parse_field_pattern(&mut self) -> Result)>, ParserError> { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for structure pattern field name"))?; + let name = match next.token { + Token::CloseBrace => { + self.save(next); + return Ok(None); + } + + Token::ValueName(s) => Name::new(self.to_location(next.span), s), + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "a field name in a structure pattern", + }); + } + }; + + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, comma, or brace after structure field name in pattern") + })?; + let sub_pattern = match next.token { + Token::Comma => None, + + Token::CloseBrace => { + self.save(next); + None + } + + Token::Colon => { + let subpattern = self.parse_pattern()?; + let next = self + .next()? + .ok_or_else(|| self.bad_eof( + "looking for comma or close brace after structure field"))?; + + match next.token { + Token::Comma => {} + Token::CloseBrace => self.save(next), + _ => return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "comma or close brace after structure field" + }), + } + + Some(subpattern) + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "colon, comma, or brace after structure field name in pattern", + }); + } + }; + + Ok(Some((name, sub_pattern))) } fn parse_if_expression(&mut self) -> Result { @@ -1120,21 +1369,26 @@ impl<'lexer> Parser<'lexer> { }; let arg = if let Some(maybe_paren) = self.next()? { - let expr = self.parse_expression()?; + if matches!(maybe_paren.token, Token::OpenParen) { + let expr = self.parse_expression()?; - let tok = self.next()?.ok_or_else(|| { - self.bad_eof("looking for close paren after enum value argument") - })?; - if !matches!(tok.token, Token::CloseParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: tok.span, - token: tok.token, - expected: "close paren after enum value argument", - }); + let tok = self.next()?.ok_or_else(|| { + self.bad_eof("looking for close paren after enum value argument") + })?; + if !matches!(tok.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: tok.span, + token: tok.token, + expected: "close paren after enum value argument", + }); + } + + Some(Box::new(expr)) + } else { + self.save(maybe_paren); + None } - - Some(Box::new(expr)) } else { None }; diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index aec9810..ef0b042 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -925,3 +925,68 @@ fn conditionals() { Ok(Expression::Conditional(cond)) if matches!(cond.test.as_ref(), Expression::Call(_, CallKind::Infix, _)))); } + +#[test] +#[allow(clippy::get_first)] +fn patterns() { + let parse_pat = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_pattern() + }; + + assert!(matches!( + parse_pat("1"), + Ok(Pattern::Constant(ConstantValue::Integer(_, + IntegerWithBase { value, .. }))) if + value == 1)); + assert!(matches!( + parse_pat("x"), + Ok(Pattern::Variable(n)) if n.as_printed() == "x")); + assert!(matches!( + parse_pat("Cons::Pair(pair)"), + Ok(Pattern::EnumerationValue(EnumerationPattern{ + type_name, variant_name, argument: Some(subpat), .. + })) if + type_name.as_printed() == "Cons" && + variant_name.as_printed() == "Pair" && + matches!(subpat.as_ref(), Pattern::Variable(p) if + p.as_printed() == "pair"))); + assert!(matches!( + parse_pat("Structure{ field, other: something }"), + Ok(Pattern::Structure(StructurePattern { type_name, fields, .. })) if + type_name.as_printed() == "Structure" && + fields.len() == 2 && + matches!(fields.get(0), Some((n, None)) if n.as_printed() == "field") && + matches!(fields.get(1), Some((n, Some(Pattern::Variable(s)))) if + n.as_printed() == "other" && + s.as_printed() == "something"))); + assert!(matches!( + parse_pat("Enumeration::Value(Structure { field, })"), + Ok(Pattern::EnumerationValue(EnumerationPattern { + type_name, variant_name, argument: Some(subpat), .. + })) if + type_name.as_printed() == "Enumeration" && + variant_name.as_printed() == "Value" && + matches!(subpat.as_ref(), Pattern::Structure(StructurePattern { + type_name, fields, .. + }) if + type_name.as_printed() == "Structure" && + fields.len() == 1 && + matches!(fields.first(), Some((f, None)) if + f.as_printed() == "field")))); + assert!(matches!( + parse_pat("Structure { field: Enumeration::Value, }"), + Ok(Pattern::Structure(StructurePattern { + type_name, fields, .. + })) if + type_name.as_printed() == "Structure" && + fields.len() == 1 && + matches!(fields.first(), Some((f, Some(subpat))) if + f.as_printed() == "field" && + matches!(subpat, Pattern::EnumerationValue(EnumerationPattern { + type_name, variant_name, argument: None, .. + }) if + type_name.as_printed() == "Enumeration" && + variant_name.as_printed() == "Value")))); +} -- 2.53.0 From 05d7284551132bb8f2abd102ea6c107fef56d579 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 5 Nov 2025 21:30:03 -0800 Subject: [PATCH 16/33] Tidy, tidy, tidy. --- src/syntax.rs | 1 + src/syntax/ast.rs | 55 ++- src/syntax/error.rs | 4 +- src/syntax/name.rs | 4 + src/syntax/parse.rs | 901 +++++++++++++++++++++---------------- src/syntax/parser_tests.rs | 242 +++++----- src/syntax/tokens.rs | 39 +- src/syntax/universe.rs | 14 +- 8 files changed, 723 insertions(+), 537 deletions(-) diff --git a/src/syntax.rs b/src/syntax.rs index d15f8f9..08451b8 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -12,3 +12,4 @@ pub use crate::syntax::error::ParserError; pub use ast::*; pub use location::{Located, Location}; pub use name::Name; +pub use universe::*; diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 1d5594c..3d88c33 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -27,6 +27,7 @@ pub enum Def { Structure(StructureDef), Function(FunctionDef), Value(ValueDef), + Operator(OperatorDef), } impl Located for Def { @@ -36,6 +37,7 @@ impl Located for Def { Def::Structure(def) => def.location.clone(), Def::Function(def) => def.location.clone(), Def::Value(def) => def.location.clone(), + Def::Operator(def) => def.location.clone(), } } } @@ -91,6 +93,13 @@ pub struct ValueDef { pub value: Expression, } +#[derive(Debug)] +pub struct OperatorDef { + pub operator_name: Name, + pub function_name: Name, + location: Location, +} + #[derive(Debug)] pub enum ExportClass { Public, @@ -114,15 +123,45 @@ pub struct BindingStmt { #[derive(Debug)] pub enum Expression { Value(ConstantValue), - Reference(Name), - EnumerationValue(Name, Name, Option>), - StructureValue(Name, Vec), + Reference(Location, Name), + Enumeration(EnumerationExpr), + Structure(StructureExpr), Conditional(ConditionalExpr), Match(MatchExpr), Call(Box, CallKind, Vec), Block(Location, Vec), } +impl Located for Expression { + fn location(&self) -> Location { + match self { + Expression::Value(c) => c.location(), + Expression::Reference(l, _) => l.clone(), + Expression::Enumeration(ev) => ev.location.clone(), + Expression::Structure(sv) => sv.location.clone(), + Expression::Conditional(ce) => ce.location.clone(), + Expression::Match(me) => me.location.clone(), + Expression::Call(_, _, _) => unimplemented!(), + Expression::Block(l, _) => l.clone(), + } + } +} + +#[derive(Debug)] +pub struct EnumerationExpr { + pub location: Location, + pub type_name: Name, + pub variant_name: Name, + pub argument: Option>, +} + +#[derive(Debug)] +pub struct StructureExpr { + pub location: Location, + pub type_name: Name, + pub fields: Vec, +} + #[derive(Debug)] pub struct ConditionalExpr { pub location: Location, @@ -240,6 +279,16 @@ pub enum ConstantValue { String(Location, String), } +impl Located for ConstantValue { + fn location(&self) -> Location { + match self { + ConstantValue::Integer(l, _) => l.clone(), + ConstantValue::Character(l, _) => l.clone(), + ConstantValue::String(l, _) => l.clone(), + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] pub struct IntegerWithBase { #[proptest(strategy = "proptest::prop_oneof![ \ diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 33e3b42..d71af16 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -16,7 +16,7 @@ pub enum ParserError { #[error("Unacceptable end of file at {file} while {place}")] UnacceptableEof { file: ArcIntern, - place: &'static str, + place: String, }, #[error("Unexpected token at {file}: expected {expected}, saw {token}")] @@ -24,7 +24,7 @@ pub enum ParserError { file: ArcIntern, span: Range, token: Token, - expected: &'static str, + expected: String, }, #[error("Unexpected problem opening file {file}: {error}")] diff --git a/src/syntax/name.rs b/src/syntax/name.rs index f8069ad..7b48d51 100644 --- a/src/syntax/name.rs +++ b/src/syntax/name.rs @@ -61,4 +61,8 @@ impl Name { pub fn bind_to(&mut self, other: &Name) { self.identifier = other.identifier; } + + pub fn location(&self) -> Option<&Location> { + self.location.as_ref() + } } diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index f9e1052..d0c3df1 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -6,6 +6,15 @@ use std::collections::HashMap; use std::ops::Range; use std::path::{Path, PathBuf}; +/// A parser for a particular file. +/// +/// This parser should be used for exactly one file, and its lifetime +/// must be tied to the underlying lexer. However, after the parser is +/// done, the resultant object should have no lifetime links to the +/// original file, so it can be thrown away. +/// +/// The parser includes information about operator precedence that is +/// stateful. pub struct Parser<'lexer> { file: ArcIntern, lexer: Lexer<'lexer>, @@ -15,6 +24,12 @@ pub struct Parser<'lexer> { postfix_precedence_table: HashMap, } +/// The directional associativity for an operator. +/// +/// This directionality impacts whether (a + b + c) defaults to +/// ((a + b) + c) or (a + (b + c)). It does not effect situations +/// in which operator numeric precedence is different between +/// operators. pub enum Associativity { Left, Right, @@ -41,6 +56,8 @@ impl<'lexer> Parser<'lexer> { /// Add the given operator to our precedence table, at the given /// precedence level and associativity. + /// + /// This is used for infix operators, only. pub fn add_infix_precedence( &mut self, operator: S, @@ -57,16 +74,28 @@ impl<'lexer> Parser<'lexer> { .insert(operator.to_string(), actual_associativity); } + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + /// + /// This is used for prefix operators, only. pub fn add_prefix_precedence(&mut self, operator: S, level: u8) { self.prefix_precedence_table .insert(operator.to_string(), level * 2); } + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + /// + /// This is used for postfix operators, only. pub fn add_postfix_precedence(&mut self, operator: S, level: u8) { self.postfix_precedence_table .insert(operator.to_string(), level * 2); } + /// Get the precedence of the given operator. + /// + /// FIXME: This currently only functions on infix operators, not + /// prefix and postfix. In general, this can all be cleaned up. fn get_precedence(&self, name: &String) -> (u8, u8) { match self.infix_precedence_table.get(name) { None => (19, 20), @@ -74,7 +103,12 @@ impl<'lexer> Parser<'lexer> { } } - /// Get the next token. + /// Get the next token from the input stream, or None if we're at + /// the end of a stream. + /// + /// Ok(None) represents "we have reached the end of the stream", while + /// an Err(_) means that we ran into some sort of error (UTF-8 formatting, + /// lexing, IO, etc.) in reading the stream. pub fn next(&mut self) -> Result, ParserError> { let result = self.known_tokens.pop(); @@ -91,21 +125,98 @@ impl<'lexer> Parser<'lexer> { } } + /// Save the given token back to the top of the stream. + /// + /// This is essentially an "undo" on next(), or an alternative path for + /// peeking at the next token in the stream. fn save(&mut self, token: LocatedToken) { self.known_tokens.push(token) } - fn bad_eof(&mut self, place: &'static str) -> ParserError { - ParserError::UnacceptableEof { - file: self.file.clone(), - place, + /// Get the location of the next token in the stream. + /// + /// This will return an error if we're at the end of the file. + fn current_location(&mut self) -> Result { + let current = self.next()?; + match current { + None => Err(self.bad_eof("trying to get current location")), + Some(token) => { + let retval = self.to_location(token.span.clone()); + self.save(token); + Ok(retval) + } } } + /// Generate the parser error that should happen when we hit an EOF + /// in a bad place. + fn bad_eof(&mut self, place: S) -> ParserError { + ParserError::UnacceptableEof { + file: self.file.clone(), + place: place.to_string(), + } + } + + /// Convert an offset into a formal location that can be saved off + /// into ASTs. fn to_location(&self, span: Range) -> Location { Location::new(&self.file, span) } + /// See if the next token is the keyword, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_keyword(&mut self, keyword: &'static str) -> Result { + match self.next()? { + None => Err(self.bad_eof(format!("looking for keyword '{keyword}'"))), + Some(ltoken) => match ltoken.token { + Token::ValueName(s) if s.as_str() == keyword => Ok(self.to_location(ltoken.span)), + _ => { + self.save(ltoken.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: ltoken.span, + token: ltoken.token, + expected: format!("keyword {keyword}"), + }) + } + }, + } + } + + /// See if the next token is the given one, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_token( + &mut self, + token: Token, + place: &'static str, + ) -> Result { + let message = || format!("looking for '{token}' in {place}"); + let next = self.next()?.ok_or_else(|| self.bad_eof(message()))?; + + if next.token != token { + self.save(next.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: message(), + }) + } else { + Ok(self.to_location(next.span)) + } + } + + /// Parse the top level file associated with a Bang module. + /// + /// This will expect to read until EOF, and will fail or stall + /// forever if there is no EOF, or the EOF ends in the wrong + /// place. So this should *not* be used for interactive sessions, + /// because those are unlikely to have EOFs in the appropriate + /// places. pub fn parse_module(&mut self) -> Result { let mut definitions = vec![]; @@ -120,11 +231,16 @@ impl<'lexer> Parser<'lexer> { } } + /// Parse a definition in a file (structure, enumeration, value, etc.). + /// + /// This will read a definition. If there's an error, it's very likely the + /// input stream will be corrupted, so you probably don't want to try to + /// recover. You can, obviously. pub fn parse_definition(&mut self) -> Result { let (export, start) = self.parse_export_class()?; let type_restrictions = self.parse_type_restrictions()?; let definition = self.parse_def()?; - let location = definition.location().merge_span(start); + let location = definition.location().extend_to(&start); Ok(Definition { location, @@ -134,42 +250,32 @@ impl<'lexer> Parser<'lexer> { }) } - fn parse_export_class(&mut self) -> Result<(ExportClass, Range), ParserError> { - let maybe_export = self - .next()? - .ok_or_else(|| self.bad_eof("looking for possible export"))?; - - if matches!(maybe_export.token, Token::ValueName(ref x) if x == "export") { - Ok((ExportClass::Public, maybe_export.span)) + /// Parse the export class for the current definition. + /// + /// If there isn't an 'export' declaration, then this will return 'private', + /// because if it hasn't been declared exported then it's private. But this + /// does mean that a future parsing error will be assumed to be a private + /// declaration. + fn parse_export_class(&mut self) -> Result<(ExportClass, Location), ParserError> { + if let Ok(span) = self.require_keyword("export") { + Ok((ExportClass::Public, span)) } else { - let start = maybe_export.span.clone(); - self.save(maybe_export); + let start = self.current_location()?; Ok((ExportClass::Private, start)) } } + /// Parse a type restriction and return it. + /// + /// Like the export class parsing, parsing type restrictions has a clear + /// default (no restrictions) when the input doesn't lead with the appropriate + /// keyword. As a result, this can generate a result even in cases in which + /// the input is empty. pub fn parse_type_restrictions(&mut self) -> Result { - let Some(maybe_restrict) = self.next()? else { - return Ok(TypeRestrictions::empty()); - }; - - if !matches!(maybe_restrict.token, Token::ValueName(ref x) if x == "restrict") { - self.save(maybe_restrict); + if self.require_keyword("restrict").is_err() { return Ok(TypeRestrictions::empty()); } - - let maybe_paren = self - .next()? - .ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?; - - if !matches!(maybe_paren.token, Token::OpenParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_paren.span, - token: maybe_paren.token, - expected: "open parenthesis, following the restrict keyword", - }); - } + let _ = self.require_token(Token::OpenParen, "type restriction")?; let mut restrictions = vec![]; @@ -177,21 +283,19 @@ impl<'lexer> Parser<'lexer> { restrictions.push(type_restriction); } - let maybe_paren = self - .next()? - .ok_or_else(|| self.bad_eof("Looking for open paren after restrict"))?; - if !matches!(maybe_paren.token, Token::CloseParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_paren.span, - token: maybe_paren.token, - expected: "close parenthesis following type restrictions", - }); - } - + let _ = self.require_token(Token::CloseParen, "type restriction")?; Ok(TypeRestrictions { restrictions }) } + /// Parse a single type retriction. + /// + /// A type restriction should consist of a constructor token followed by + /// some number of arguments. We parse this in the obvious way, stopping + /// the input when we hit something that isn't a base type. + /// + /// Note that, because of this, we might end up in a situation in which + /// we throw an error after consuming a bunch of input, meaning that it + /// will be impossible to recover. fn parse_type_restriction(&mut self) -> Result, ParserError> { let maybe_constructor = self .next()? @@ -220,7 +324,8 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: maybe_constructor.span, token: weird, - expected: "Constructor name, comma, or close parenthesis in type restriction", + expected: "Constructor name, comma, or close parenthesis in type restriction" + .into(), }); } }; @@ -248,6 +353,13 @@ impl<'lexer> Parser<'lexer> { Ok(Some(restriction)) } + /// Parse a definition. + /// + /// A definition can include a structure definition, the definition of an enumeration, + /// the declaration of some sort of operator, or a value definition. (This statement + /// assumes that you consider a function a value, which is reasonable.) + /// + /// If this returns an error, you should not presume that you can recover from it. fn parse_def(&mut self) -> Result { let next = self .next()? @@ -261,6 +373,10 @@ impl<'lexer> Parser<'lexer> { return Ok(Def::Enumeration(enumeration)); } + if let Ok(operator) = self.parse_operator() { + return Ok(Def::Operator(operator)); + } + if let Ok(fun_or_val) = self.parse_function_or_value() { return Ok(fun_or_val); } @@ -269,71 +385,31 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "'structure', 'enumeration', or a value identifier", + expected: "'structure', 'enumeration', or a value identifier".into(), }) } + /// Parse a structure definition. + /// + /// Structure definitions should start with the keyword "structure". If they + /// don't, this will return, but it will do so in a way that is recoverable. + /// Otherwise, we'll start eating tokens and who knows what state we'll end + /// in. pub fn parse_structure(&mut self) -> Result { - let structure_token = self - .next()? - .ok_or_else(|| self.bad_eof("looking for definition"))?; - if !matches!(structure_token.token, Token::ValueName(ref s) if s == "structure") { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: structure_token.span, - token: structure_token.token, - expected: "the 'structure' keyword", - }); - } + let start_location = self.require_keyword("structure")?; - let name = self - .next()? - .ok_or_else(|| self.bad_eof("looking for structure name"))?; - let structure_name = match name.token { - Token::TypeName(str) => Name::new(self.to_location(name.span), str), - _ => { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: name.span, - token: name.token, - expected: "a structure name", - }); - } - }; - - let brace = self - .next()? - .ok_or_else(|| self.bad_eof("the open brace after a structure name"))?; - if !matches!(brace.token, Token::OpenBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: brace.span, - token: brace.token, - expected: "the brace after a structure name", - }); - } + let structure_name = self.parse_type_name("structure definition")?; + self.require_token(Token::OpenBrace, "after a structure name")?; let mut fields = vec![]; - while let Some(field_definition) = self.parse_field_definition()? { fields.push(field_definition); } - let brace = self.next()?.ok_or_else(|| { - self.bad_eof("the close brace after at the end of a structure definition") - })?; - if !matches!(brace.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: brace.span, - token: brace.token, - expected: "the brace at the end of a structure definition", - }); - } + let brace = + self.require_token(Token::CloseBrace, "at the end of a structure definition")?; - let location = self - .to_location(structure_token.span) - .extend_to(&self.to_location(brace.span)); + let location = start_location.extend_to(&brace); Ok(StructureDef { name: structure_name, @@ -342,66 +418,38 @@ impl<'lexer> Parser<'lexer> { }) } + /// Parse a name and field value for a field inside a structure constructor. + /// + /// In this case, what we mean is the full "foo: bar" syntax that goes inside a structure + /// expression to declare a value. pub fn parse_field_value(&mut self) -> Result, ParserError> { - let maybe_name = self - .next()? - .ok_or_else(|| self.bad_eof("parsing field definition"))?; - - let field = match maybe_name.token { - Token::ValueName(x) => Name::new(self.to_location(maybe_name.span), x), - _ => { - self.save(maybe_name.clone()); - return Ok(None); - } + let Ok(field) = self.parse_name("structure value") else { + return Ok(None); }; - - let maybe_colon = self.next()?.ok_or_else(|| { - self.bad_eof("looking for colon, comma, or close brace after field name") - })?; - if !matches!(maybe_colon.token, Token::Colon) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_colon.span, - token: maybe_colon.token, - expected: "colon after field name in constructor", - }); - } - + self.require_token(Token::Colon, "after a field name")?; let value = self.parse_expression()?; - let end_token = self.next()?.ok_or_else(|| { - self.bad_eof("looking for comma or close brace after field definition") - })?; - if !matches!(end_token.token, Token::Comma) { + if let Some(end_token) = self.next()? + && !matches!(end_token.token, Token::Comma) + { self.save(end_token); } Ok(Some(FieldValue { field, value })) } + /// Parse a name and field definition for a field inside a structure definition. + /// + /// In this case, what we mean is the full "foo: Bar" syntax that goes inside a + /// structure type definition. Note, though, that we allow the ": Bar" to be + /// elided in the case that the user wants to try to infer the type. In addition, + /// recall that structure types can declare their individual fields public or + /// not, so that information gets parsed as well. pub fn parse_field_definition(&mut self) -> Result, ParserError> { - let (export, start) = self.parse_export_class()?; - let maybe_name = self - .next()? - .ok_or_else(|| self.bad_eof("parsing field definition"))?; - - let name = match maybe_name.token { - Token::ValueName(x) => Name::new(self.to_location(maybe_name.span.clone()), x), - _ => { - self.save(maybe_name.clone()); - if matches!(export, ExportClass::Private) { - return Ok(None); - } else { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_name.span, - token: maybe_name.token, - expected: "a field name", - }); - } - } + let (export, start_location) = self.parse_export_class()?; + let Ok(name) = self.parse_name("field definition") else { + return Ok(None); }; - let start_location = self.to_location(start); let maybe_colon = self.next()?.ok_or_else(|| { self.bad_eof("looking for colon, comma, or close brace after field name") @@ -420,7 +468,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: maybe_colon.span, token: maybe_colon.token, - expected: "colon, comma, or close brace after field name", + expected: "colon, comma, or close brace after field name".into(), }); } }; @@ -440,14 +488,14 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: end_token.span, token: end_token.token, - expected: "looking for comma or close brace after field definition", + expected: "looking for comma or close brace after field definition".into(), }); } }; let end_location = maybe_end_location .or_else(|| field_type.as_ref().map(|x| x.location())) - .unwrap_or_else(|| self.to_location(maybe_name.span)); + .unwrap_or_else(|| name.location().unwrap().clone()); let location = start_location.extend_to(&end_location); Ok(Some(StructureField { @@ -458,67 +506,24 @@ impl<'lexer> Parser<'lexer> { })) } + /// Parse an enumeration declaration from the input stream. + /// + /// As with structures, this will cleanly abort if the first token is wrong, + /// but if it makes it past that token, all bets are off. pub fn parse_enumeration(&mut self) -> Result { - let enumeration_token = self - .next()? - .ok_or_else(|| self.bad_eof("looking for definition"))?; - if !matches!(enumeration_token.token, Token::ValueName(ref e) if e == "enumeration") { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: enumeration_token.span, - token: enumeration_token.token, - expected: "the 'enumeration' keyword", - }); - } + let start_location = self.require_keyword("enumeration")?; + let enumeration_name = self.parse_type_name("enumeration definition")?; - let name = self - .next()? - .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; - let enumeration_name = match name.token { - Token::TypeName(str) => Name::new(self.to_location(name.span), str), - _ => { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: name.span, - token: name.token, - expected: "an enumeration name", - }); - } - }; - - let brace = self - .next()? - .ok_or_else(|| self.bad_eof("the open brace after an enumeration name"))?; - if !matches!(brace.token, Token::OpenBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: brace.span, - token: brace.token, - expected: "the brace after an enumeration name", - }); - } + self.require_token(Token::OpenBrace, "after enumeration name")?; let mut variants = vec![]; - while let Some(variant_definition) = self.parse_enum_variant()? { variants.push(variant_definition); } - let brace = self.next()?.ok_or_else(|| { - self.bad_eof("the close brace after at the end of an enumeration definition") - })?; - if !matches!(brace.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: brace.span, - token: brace.token, - expected: "the brace at the end of an enumeration definition", - }); - } + let brace = self.require_token(Token::CloseBrace, "after enumeration options")?; - let location = self - .to_location(enumeration_token.span) - .extend_to(&self.to_location(brace.span)); + let location = start_location.extend_to(&brace); Ok(EnumerationDef { name: enumeration_name, @@ -527,46 +532,22 @@ impl<'lexer> Parser<'lexer> { }) } + /// Parse a variant of an enumeration in the enumeration definition. + /// + /// At this point in bang's lifecycle, enumerations can have zero or one arguments, + /// but no more, which simplified parsing a trace. pub fn parse_enum_variant(&mut self) -> Result, ParserError> { - let maybe_name = self - .next()? - .ok_or_else(|| self.bad_eof("looking for enumeration name"))?; - let name = match maybe_name.token { - Token::TypeName(x) => Name::new(self.to_location(maybe_name.span.clone()), x), - Token::CloseBrace => { - self.save(maybe_name); - return Ok(None); - } - _ => { - self.save(maybe_name.clone()); - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_name.span, - token: maybe_name.token, - expected: "variant name (identifier starting with a capital)", - }); - } + let Ok(name) = self.parse_type_name("variant definition") else { + return Ok(None); }; - let start_location = self.to_location(maybe_name.span); + let start_location = name.location().unwrap().clone(); let maybe_paren = self .next()? .ok_or_else(|| self.bad_eof("trying to understand enumeration variant"))?; let (argument, arg_location) = if matches!(maybe_paren.token, Token::OpenParen) { let t = self.parse_type()?; - - let maybe_close = self - .next()? - .ok_or_else(|| self.bad_eof("trying to parse a enumeration variant's type"))?; - if !matches!(maybe_close.token, Token::CloseParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: maybe_close.span, - token: maybe_close.token, - expected: "close paren to end an enumeration variant's type argument", - }); - } - + self.require_token(Token::CloseParen, "variant's type argument")?; let location = t.location(); (Some(t), location) } else { @@ -589,7 +570,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: ender.span, token: ender.token, - expected: "comma or close brace after enumeration variant", + expected: "comma or close brace after enumeration variant".into(), }); } }; @@ -603,10 +584,90 @@ impl<'lexer> Parser<'lexer> { })) } - fn parse_function_or_value(&mut self) -> Result { + /// Parse an operator declaration. + /// + /// Operator declarations are the only thing where we immediately modify the state + /// of the parser, allowing the operator to be used immediately after it is declared. + /// Note that by "declare", we mean that the operator is given a variable that it maps + /// to; that variable can be declared further on in the file or even in another module, + /// as we won't try to resolve it until later. + /// + /// Like most definitions, we'll abort cleanly if the first token isn't the "operator" + /// keyword, but all bets are off after that. + pub fn parse_operator(&mut self) -> Result { + let _operator = self.require_keyword("operator")?; + unimplemented!() } + /// Parse a function or a value. + /// + /// Technically speaking, functions are values, so the name can feel a little silly. + /// However, we have some nice syntax for functions that avoids the need to put lambdas + /// everywhere, and so we sort of treat them differently. + fn parse_function_or_value(&mut self) -> Result { + let name = self.parse_name("function or value definition")?; + let start = name.location().unwrap().clone(); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("type or value for definition"))?; + + match next.token { + // If we see an open parenthesis next, we're looking at a nicely-formatted + // function definition, such as: + // + // factorial(x: Int) : Int { + // match x { + // 1 => 1, + // x => x * fact(x - 1), + // } + // } + // + // Or any of many variations of that. + Token::OpenParen => { + unimplemented!() + } + + // If we see a colon, then someone's giving us a type for what is probably + // some form of simple constant, such as: + // + // foo : Int = 4 + // + // But honestly, there's a lot of odd possibilities of complicated things + // they could write there. + Token::Colon => { + unimplemented!() + } + + // If we see an equal sign, we're jumping right to the value part of the + // definition, and we're doing something like this: + // + // foo = 4 + // + // Again, though, you could write all sorts of interesting things after + // that. + Token::OperatorName(eq) if eq == "=" => { + let value = self.parse_expression()?; + + Ok(Def::Value(ValueDef { + name, + location: start.extend_to(&value.location()), + value, + })) + } + + // Those should be the only cases, so if we get here, something weird + // is going on. + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "open parenthesis, colon, or equals after variable in definition".into(), + }), + } + } + pub fn parse_expression(&mut self) -> Result { let next = self .next()? @@ -634,7 +695,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an 'match' to start a pattern match", + expected: "an 'match' to start a pattern match".into(), }); } let start = self.to_location(next.span); @@ -649,7 +710,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an open brace after the match expression", + expected: "an open brace after the match expression".into(), }); } @@ -667,7 +728,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a close brace to end a match expression", + expected: "a close brace to end a match expression".into(), }); } let end = self.to_location(next.span); @@ -712,7 +773,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an arrow after a pattern, as part of a match case", + expected: "an arrow after a pattern, as part of a match case".into(), }); } @@ -762,7 +823,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: final_brace.span, token: final_brace.token, - expected: "closing brace in structure pattern", + expected: "closing brace in structure pattern".into(), }); } let final_brace_location = self.to_location(final_brace.span); @@ -776,19 +837,7 @@ impl<'lexer> Parser<'lexer> { Ok(Pattern::Structure(structure_pattern)) } - Token::Colon => { - let second_colon = self.next()?.ok_or_else(|| { - self.bad_eof("looking for second colon in an enumeration pattern") - })?; - if !matches!(second_colon.token, Token::Colon) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: second_colon.span, - token: second_colon.token, - expected: "second colon in an enumeration pattern", - }); - } - + Token::DoubleColon => { let vname = self.next()?.ok_or_else(|| { self.bad_eof("looking for enumeration value name in pattern") })?; @@ -804,7 +853,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: vname.span, token: vname.token, - expected: "enumeration value name in pattern", + expected: "enumeration value name in pattern".into(), }); } }; @@ -816,14 +865,16 @@ impl<'lexer> Parser<'lexer> { let sub_pattern = self.parse_pattern()?; let tok = self.next()?.ok_or_else(|| { - self.bad_eof("looking for close paren after enum value argument") + self.bad_eof( + "looking for close paren after enum value argument", + ) })?; if !matches!(tok.token, Token::CloseParen) { return Err(ParserError::UnexpectedToken { file: self.file.clone(), span: tok.span, token: tok.token, - expected: "close paren after enum value argument", + expected: "close paren after enum value argument".into(), }); } @@ -853,7 +904,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "An '::' or '{' after a type name in a pattern", + expected: "An '::' or '{' after a type name in a pattern".into(), }), } } @@ -862,7 +913,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "The start of a pattern: a variable name or type name", + expected: "The start of a pattern: a variable name or type name".into(), }), } } @@ -884,7 +935,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a field name in a structure pattern", + expected: "a field name in a structure pattern".into(), }); } }; @@ -902,20 +953,21 @@ impl<'lexer> Parser<'lexer> { Token::Colon => { let subpattern = self.parse_pattern()?; - let next = self - .next()? - .ok_or_else(|| self.bad_eof( - "looking for comma or close brace after structure field"))?; + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after structure field") + })?; match next.token { Token::Comma => {} Token::CloseBrace => self.save(next), - _ => return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "comma or close brace after structure field" - }), + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "comma or close brace after structure field".into(), + }); + } } Some(subpattern) @@ -926,7 +978,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "colon, comma, or brace after structure field name in pattern", + expected: "colon, comma, or brace after structure field name in pattern".into(), }); } }; @@ -943,7 +995,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an 'if' to start a conditional", + expected: "an 'if' to start a conditional".into(), }); } let start = self.to_location(next.span); @@ -993,7 +1045,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an open brace to start a block", + expected: "an open brace to start a block".into(), }); } let start = self.to_location(next.span); @@ -1017,14 +1069,14 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a close brace to end a block", + expected: "a close brace to end a block".into(), }); } let end = self.to_location(next.span); if !ended_with_expr { let void_name = Name::new(end.clone(), "%prim%void"); - let void_ref = Expression::Reference(void_name); + let void_ref = Expression::Reference(end.clone(), void_name); let void_call = Expression::Call(Box::new(void_ref), CallKind::Normal, vec![]); statements.push(Statement::Expression(void_call)); } @@ -1033,36 +1085,34 @@ impl<'lexer> Parser<'lexer> { } pub fn parse_statement(&mut self) -> Result, ParserError> { - loop { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; - match next.token { - Token::CloseBrace => { + match next.token { + Token::CloseBrace => { + self.save(next); + Ok(None) + } + + Token::ValueName(ref l) if l == "let" => { + self.save(next); + Ok(Some((Statement::Binding(self.parse_let()?), false))) + } + + _ => { + self.save(next); + let expr = Statement::Expression(self.parse_expression()?); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; + + if matches!(next.token, Token::Semi) { + Ok(Some((expr, false))) + } else { self.save(next); - return Ok(None); - } - - Token::ValueName(ref l) if l == "let" => { - self.save(next); - return Ok(Some((Statement::Binding(self.parse_let()?), false))); - } - - _ => { - self.save(next); - let expr = Statement::Expression(self.parse_expression()?); - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; - - if matches!(next.token, Token::Semi) { - return Ok(Some((expr, false))); - } else { - self.save(next); - return Ok(Some((expr, true))); - } + Ok(Some((expr, true))) } } } @@ -1078,7 +1128,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a 'let' to open a binding statement", + expected: "a 'let' to open a binding statement".into(), }); } let start = self.to_location(next.span); @@ -1101,7 +1151,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a variable name for the let binding", + expected: "a variable name for the let binding".into(), }); } }; @@ -1114,7 +1164,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "an '=' after the variable name in a let binding", + expected: "an '=' after the variable name in a let binding".into(), }); } @@ -1128,7 +1178,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a semicolon to finish a let statement", + expected: "a semicolon to finish a let statement".into(), }); } let end = self.to_location(next.span); @@ -1155,13 +1205,14 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "a base expression of a tighter-binding prefix operator", + expected: "a base expression of a tighter-binding prefix operator".into(), }); } let rhs = self.parse_arithmetic(*pre_prec)?; - let opname = Name::new(self.to_location(next.span), n); - let op_expr = Expression::Reference(opname); + let location = self.to_location(next.span); + let opname = Name::new(location.clone(), n); + let op_expr = Expression::Reference(location, opname); Expression::Call(Box::new(op_expr), CallKind::Prefix, vec![rhs]) } else { @@ -1192,14 +1243,15 @@ impl<'lexer> Parser<'lexer> { break; } - let opname = Name::new(self.to_location(next.span), n); - let op_expr = Expression::Reference(opname); + let location = self.to_location(next.span); + let opname = Name::new(location.clone(), n); + let op_expr = Expression::Reference(location, opname); lhs = Expression::Call(Box::new(op_expr), CallKind::Postfix, vec![lhs]); continue; } - let (left_pr, right_pr) = self.get_precedence(&n); + let (left_pr, right_pr) = self.get_precedence(n); if left_pr < level { self.save(next); @@ -1207,8 +1259,9 @@ impl<'lexer> Parser<'lexer> { } let rhs = self.parse_arithmetic(right_pr)?; - let name = Name::new(self.to_location(next.span), n); - let opref = Box::new(Expression::Reference(name)); + let location = self.to_location(next.span); + let name = Name::new(location.clone(), n); + let opref = Box::new(Expression::Reference(location, name)); let args = vec![lhs, rhs]; lhs = Expression::Call(opref, CallKind::Infix, args); @@ -1234,7 +1287,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "open paren for call arguments", + expected: "open paren for call arguments".into(), }); } @@ -1264,7 +1317,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "comma or close paren in function arguments", + expected: "comma or close paren in function arguments".into(), }); } } @@ -1273,6 +1326,23 @@ impl<'lexer> Parser<'lexer> { Ok(args) } + /// Parse a base expression. + /// + /// A base expression can be any number of things: + /// * A constant, of any form + /// * A variable name + /// * A constructor, like a structure constructor or an enumeration value + /// * A parenthesized expression of some other form + /// * A block + /// + /// Most of these can be identified by the first token in the input + /// stream. If we don't recognize a valid first token in the input + /// stream, we return an error and restore the original input stream + /// state. However, if the first token leads us to a valid next state, + /// we may not be able to recover the original stream state on an error. + /// + /// As a result, this should only be called when you're very confident + /// that the next thing is going to be an expression. pub fn parse_base_expression(&mut self) -> Result { if let Ok(v) = self.parse_constant() { return Ok(Expression::Value(v)); @@ -1285,29 +1355,17 @@ impl<'lexer> Parser<'lexer> { match next.token { Token::OpenBrace => { self.save(next); - return self.parse_block(); + self.parse_block() } Token::OpenParen => { let inner = self.parse_expression()?; - let hopefully_close = self - .next()? - .ok_or_else(|| self.bad_eof("looking for close paren to finish expression"))?; - if matches!(hopefully_close.token, Token::CloseParen) { - Ok(inner) - } else { - self.save(hopefully_close.clone()); - Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: hopefully_close.span, - token: hopefully_close.token, - expected: "close paren after expression", - }) - } + self.require_token(Token::CloseParen, "the end of a parenthesized expression")?; + Ok(inner) } Token::TypeName(n) | Token::PrimitiveTypeName(n) => { - let type_name = Name::new(self.to_location(next.span), n); + let type_name = Name::new(self.to_location(next.span.clone()), n); let after_type_name = self.next()?.ok_or_else(|| { self.bad_eof("looking for colon, open brace, or open paren in constructor") })?; @@ -1320,41 +1378,25 @@ impl<'lexer> Parser<'lexer> { fields.push(field); } - let closer = self.next()?.ok_or_else(|| { - self.bad_eof("looking for close brace in structure value") - })?; - if !matches!(closer.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: closer.span, - token: closer.token, - expected: "close brace or comma after field value", - }); - } + let brace = self.require_token(Token::CloseBrace, "end of structure value")?; - Ok(Expression::StructureValue(type_name, fields)) + let sv = StructureExpr { + location: self.to_location(next.span).extend_to(&brace), + type_name, + fields, + }; + + Ok(Expression::Structure(sv)) } - Token::Colon => { - let second_colon = self.next()?.ok_or_else(|| { - self.bad_eof("looking for second colon in enumeration value") - })?; - if !matches!(second_colon.token, Token::Colon) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: second_colon.span, - token: second_colon.token, - expected: "second colon in enumeration value", - }); - } - + Token::DoubleColon => { let vname = self .next()? .ok_or_else(|| self.bad_eof("looking for enumeration value name"))?; - let value_name = match vname.token { + let variant_name = match vname.token { Token::TypeName(s) => { - let loc = self.to_location(vname.span); + let loc = self.to_location(vname.span.clone()); Name::new(loc, s) } @@ -1363,52 +1405,48 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: vname.span, token: vname.token, - expected: "enumeration value name", + expected: "enumeration value name".into(), }); } }; - let arg = if let Some(maybe_paren) = self.next()? { + let (argument, end_loc) = if let Some(maybe_paren) = self.next()? { if matches!(maybe_paren.token, Token::OpenParen) { let expr = self.parse_expression()?; + let closer = self.require_token(Token::CloseParen, "after variant argument")?; - let tok = self.next()?.ok_or_else(|| { - self.bad_eof("looking for close paren after enum value argument") - })?; - if !matches!(tok.token, Token::CloseParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: tok.span, - token: tok.token, - expected: "close paren after enum value argument", - }); - } - - Some(Box::new(expr)) + (Some(Box::new(expr)), closer) } else { self.save(maybe_paren); - None + (None, self.to_location(vname.span)) } } else { - None + (None, self.to_location(vname.span)) }; - Ok(Expression::EnumerationValue(type_name, value_name, arg)) + let ev = EnumerationExpr { + location: self.to_location(next.span).extend_to(&end_loc), + type_name, + variant_name, + argument, + }; + + Ok(Expression::Enumeration(ev)) } _ => Err(ParserError::UnexpectedToken { file: self.file.clone(), span: after_type_name.span, token: after_type_name.token, - expected: "colon, open brace, or open paren in constructor", + expected: "colon, open brace, or open paren in constructor".into(), }), } } Token::ValueName(n) | Token::PrimitiveValueName(n) => { let location = self.to_location(next.span); - let name = Name::new(location, n); - Ok(Expression::Reference(name)) + let name = Name::new(location.clone(), n); + Ok(Expression::Reference(location, name)) } _ => { @@ -1417,17 +1455,20 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: next.span, token: next.token, - expected: "some base expression or an open brace", + expected: "some base expression or an open brace".into(), }) } } } + /// Parse a type from the input stream. + /// + /// Obviously, there are a lot of ways for things to not be a valid + /// function type. As it can, this will try to leave things in the + /// original state on an error, but that won't always be possible. So + /// it's probably best to only try to call this when you're sure there + /// should be a type sitting there. pub fn parse_type(&mut self) -> Result { - self.parse_function_type() - } - - fn parse_function_type(&mut self) -> Result { let mut args = Vec::new(); while let Ok(t) = self.parse_type_application() { @@ -1439,23 +1480,23 @@ impl<'lexer> Parser<'lexer> { None => { return Err(ParserError::UnacceptableEof { file: self.file.clone(), - place: "parsing function type or type", + place: "parsing function type or type".into(), }); } - Some(t) if args.len() == 0 => return Ok(t), + Some(t) if args.is_empty() => return Ok(t), Some(_) => { return Err(ParserError::UnacceptableEof { file: self.file.clone(), - place: "looking for '->' in function type", + place: "looking for '->' in function type".into(), }); } } }; if maybe_arrow.token == Token::Arrow { - let right = self.parse_function_type()?; + let right = self.parse_type()?; Ok(Type::Function(args, Box::new(right))) } else if args.len() == 1 { self.save(maybe_arrow); @@ -1468,11 +1509,23 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span, token, - expected: "'->' in function type", + expected: "'->' in function type".into(), }) } } + /// Parse a type application. + /// + /// Type applications must start with a type name (a capitalized variable + /// name). If we don't find one, we immediately error out. However if we + /// do find one, we will then eat as many base types as we can until we + /// run into an error. + /// + /// If we don't find a type name immediately, we will return an error but + /// leave the parse stream unchanged. If we parse a bunch of base types + /// correctly, the stream will be left at the start of the first non-base-type + /// token. However, this function can leave things in a weird state if there + /// is an open parenthesis that tries to enclose something that's not a type. fn parse_type_application(&mut self) -> Result { let LocatedToken { token, span } = self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; @@ -1501,6 +1554,14 @@ impl<'lexer> Parser<'lexer> { Ok(Type::Application(Box::new(constructor), args)) } + /// Parse a base type from the input stream. + /// + /// A "base type" is a type variable, a primitive type name, a type name, + /// or a parenthesized version of some other type. This function will return + /// an error if it can't find one of these things, and will *attempt* to + /// return the stream unmodified in the event of an error. However, if it + /// sees a parenthesis and tries to parse a nested, complex type, it may + /// not be possible to recover the state precisely. fn parse_base_type(&mut self) -> Result { let LocatedToken { token, span } = self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; @@ -1529,7 +1590,7 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: closer.span, token: closer.token, - expected: "close parenthesis to finish a type", + expected: "close parenthesis to finish a type".into(), }); } @@ -1545,13 +1606,17 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span, token, - expected: "type constructor, type variable, or primitive type", + expected: "type constructor, type variable, or primitive type".into(), }) } } } - pub fn parse_constant(&mut self) -> Result { + /// Try to parse a constant value from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + pub(crate) fn parse_constant(&mut self) -> Result { let maybe_constant = self .next()? .ok_or_else(|| self.bad_eof("looking for a constant"))?; @@ -1575,9 +1640,53 @@ impl<'lexer> Parser<'lexer> { file: self.file.clone(), span: maybe_constant.span, token: maybe_constant.token, - expected: "constant value", + expected: "constant value".into(), }) } } } + + /// Try to parse a name from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a name in {place}")))?; + + if let Token::ValueName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for a name in {place}"), + }) + } + } + + /// Try to parse a type name from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_type_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?; + + if let Token::TypeName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for a type name in {place}"), + }) + } + } } diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index ef0b042..a849689 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -151,7 +151,7 @@ fn type_restrictions() { matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); - assert!(matches!(parse_tr("restrict(,Cons a b,)"), Err(_))); + assert!(parse_tr("restrict(,Cons a b,)").is_err()); assert!(matches!( parse_tr("restrict(Cons a b, Monad m)"), @@ -196,7 +196,7 @@ fn field_definition() { result.parse_field_definition() }; - assert!(matches!(parse_fd("foo"), Err(_),)); + assert!(parse_fd("foo").is_err()); assert!(matches!( parse_fd("foo,"), Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) @@ -250,10 +250,11 @@ fn structures() { result.parse_structure() }; - assert!(matches!(parse_st("structure { }"), Err(_))); - assert!(matches!(parse_st("structure {"), Err(_))); - assert!(matches!(parse_st("structure foo {}"), Err(_))); + assert!(parse_st("structure { }").is_err()); + assert!(parse_st("structure {").is_err()); + assert!(parse_st("structure foo {}").is_err()); + println!("result: {:?}", parse_st("structure Foo {}")); assert!(matches!( parse_st("structure Foo {}"), Ok(StructureDef { name, fields, .. }) @@ -264,7 +265,7 @@ fn structures() { Ok(StructureDef { name, fields, .. }) if name.as_printed() == "Foo" && matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] - if name.as_printed() == "bar" && matches!(field_type, None)))); + if name.as_printed() == "bar" && field_type.is_none()))); assert!(matches!( parse_st("structure Foo { bar: Word8 }"), @@ -326,10 +327,10 @@ fn enum_variant() { result.parse_enum_variant() }; - assert!(matches!(parse_ev("foo"), Err(_),)); - assert!(matches!(parse_ev("foo,"), Err(_),)); - assert!(matches!(parse_ev("Cons foo,"), Err(_),)); - assert!(matches!(parse_ev(""), Err(_))); + assert!(matches!(parse_ev("foo"), Ok(None))); + assert!(matches!(parse_ev("foo,"), Ok(None))); + assert!(parse_ev("Cons foo,").is_err()); + assert!(matches!(parse_ev(""), Ok(None))); assert!(matches!(parse_ev("}"), Ok(None))); @@ -383,9 +384,9 @@ fn enumerations() { result.parse_enumeration() }; - assert!(matches!(parse_en("enumeration { }"), Err(_))); - assert!(matches!(parse_en("enumeration {"), Err(_))); - assert!(matches!(parse_en("enumeration"), Err(_))); + assert!(parse_en("enumeration { }").is_err()); + assert!(parse_en("enumeration {").is_err()); + assert!(parse_en("enumeration").is_err()); assert!(matches!( parse_en("enumeration Empty { }"), @@ -419,13 +420,13 @@ fn expressions() { result.parse_expression() }; - assert!(matches!(parse_ex(""), Err(_))); + assert!(parse_ex("").is_err()); assert!(matches!( parse_ex("x"), - Ok(Expression::Reference(n)) if n.as_printed() == "x")); + Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); assert!(matches!( parse_ex("(x)"), - Ok(Expression::Reference(n)) if n.as_printed() == "x")); + Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); assert!(matches!( parse_ex("'c'"), Ok(Expression::Value(ConstantValue::Character(_, _))) @@ -452,17 +453,19 @@ fn enumeration_values() { result.parse_expression() }; - assert!(matches!(parse_ex("Hello::world"), Err(_))); + assert!(parse_ex("Hello::world").is_err()); assert!(matches!( parse_ex("Hello::World"), - Ok(Expression::EnumerationValue(t, v, None)) - if t.as_printed() == "Hello" && - v.as_printed() == "World")); + Ok(Expression::Enumeration(ev)) + if ev.type_name.as_printed() == "Hello" && + ev.variant_name.as_printed() == "World" && + ev.argument.is_none())); assert!(matches!( parse_ex("Hello::World(a)"), - Ok(Expression::EnumerationValue(t, v, Some(_))) - if t.as_printed() == "Hello" && - v.as_printed() == "World")); + Ok(Expression::Enumeration(ev)) + if ev.type_name.as_printed() == "Hello" && + ev.variant_name.as_printed() == "World" && + ev.argument.is_some())); } #[test] @@ -473,29 +476,30 @@ fn structure_value() { result.parse_expression() }; - assert!(matches!(parse_st("Foo{ , }"), Err(_))); - assert!(matches!(parse_st("Foo{ foo, }"), Err(_))); - assert!(matches!(parse_st("Foo{ foo: , }"), Err(_))); - assert!(matches!(parse_st("Foo{ , foo: 1, }"), Err(_))); + assert!(parse_st("Foo{ , }").is_err()); + assert!(parse_st("Foo{ foo, }").is_err()); + assert!(parse_st("Foo{ foo: , }").is_err()); + assert!(parse_st("Foo{ , foo: 1, }").is_err()); + println!("result: {:?}", parse_st("Foo{ foo: 1 }")); assert!(matches!( parse_st("Foo{ foo: 1 }"), - Ok(Expression::StructureValue(sname, values)) - if sname.as_printed() == "Foo" && - matches!(values.as_slice(), [FieldValue{ field, value }] + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field, value }] if field.as_printed() == "foo" && matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); assert!(matches!( parse_st("Foo{ foo: 1, }"), - Ok(Expression::StructureValue(sname, values)) - if sname.as_printed() == "Foo" && - matches!(values.as_slice(), [FieldValue{ field, value }] + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field, value }] if field.as_printed() == "foo" && matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); assert!(matches!( parse_st("Foo{ foo: 1, bar: \"foo\" }"), - Ok(Expression::StructureValue(sname, values)) - if sname.as_printed() == "Foo" && - matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 }, + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field: f1, value: v1 }, FieldValue{ field: f2, value: v2 }] if f1.as_printed() == "foo" && f2.as_printed() == "bar" && @@ -503,15 +507,15 @@ fn structure_value() { matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); assert!(matches!( parse_st("Foo{ foo: 1, bar: \"foo\", }"), - Ok(Expression::StructureValue(sname, values)) - if sname.as_printed() == "Foo" && - matches!(values.as_slice(), [FieldValue{ field: f1, value: v1 }, + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field: f1, value: v1 }, FieldValue{ field: f2, value: v2 }] if f1.as_printed() == "foo" && f2.as_printed() == "bar" && matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); - assert!(matches!(parse_st("Foo{ foo: 1,, bar: \"foo\", }"), Err(_))); + assert!(parse_st("Foo{ foo: 1,, bar: \"foo\", }").is_err()); } #[test] @@ -539,7 +543,7 @@ fn infix_and_precedence() { assert!(matches!( parse_ex("1 + 2"), Ok(Expression::Call(plus, CallKind::Infix, args)) - if matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + if matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) @@ -547,12 +551,12 @@ fn infix_and_precedence() { assert!(matches!( parse_ex("1 + 2 + 3"), Ok(Expression::Call(plus, CallKind::Infix, args)) - if matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + if matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ Expression::Call(innerplus, CallKind::Infix, inner_args), Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) ] if *v3 == 3 && - matches!(innerplus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(innerplus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(inner_args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) @@ -560,12 +564,12 @@ fn infix_and_precedence() { assert!(matches!( parse_ex("1 * 2 * 3"), Ok(Expression::Call(times, CallKind::Infix, args)) - if matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + if matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && matches!(args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), Expression::Call(innertimes, CallKind::Infix, inner_args), ] if *v1 == 1 && - matches!(innertimes.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(innertimes.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && matches!(inner_args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })), Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) @@ -574,19 +578,19 @@ fn infix_and_precedence() { assert!(matches!( parse_ex("1 + 2 * 3 + 4"), Ok(Expression::Call(plus_right, CallKind::Infix, outer_args)) if - matches!(plus_right.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus_right.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(outer_args.as_slice(), [ Expression::Call(plus_left, CallKind::Infix, left_args), Expression::Value(ConstantValue::Integer(_, v4)) ] if matches!(v4, IntegerWithBase{ value: 4, .. }) && - matches!(plus_left.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus_left.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(left_args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, v1)), Expression::Call(times, CallKind::Infix, times_args) ] if matches!(v1, IntegerWithBase{ value: 1, .. }) && - matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && matches!(times_args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, v2)), Expression::Value(ConstantValue::Integer(_, v3)) @@ -597,13 +601,13 @@ fn infix_and_precedence() { assert!(matches!( parse_ex("1 * 2 + 3 * 4"), Ok(Expression::Call(plus, CallKind::Infix, outer_args)) if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(outer_args.as_slice(), [ Expression::Call(left_times, CallKind::Infix, left_args), Expression::Call(right_times, CallKind::Infix, right_args) ] if - matches!(left_times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && - matches!(right_times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(left_times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(right_times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && matches!(left_args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, v1)), Expression::Value(ConstantValue::Integer(_, v2)), @@ -631,95 +635,95 @@ fn calls() { assert!(matches!( parse_ex("f()"), Ok(Expression::Call(f, CallKind::Normal, args)) if - matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && args.is_empty())); assert!(matches!( parse_ex("f(a)"), Ok(Expression::Call(f, CallKind::Normal, args)) if - matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); assert!(matches!( parse_ex("f(a,b)"), Ok(Expression::Call(f, CallKind::Normal, args)) if - matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && matches!(args.as_slice(), [ - Expression::Reference(a), - Expression::Reference(b), + Expression::Reference(_,a), + Expression::Reference(_,b), ] if a.as_printed() == "a" && b.as_printed() == "b"))); assert!(matches!( parse_ex("f(a,b,)"), Ok(Expression::Call(f, CallKind::Normal, args)) if - matches!(f.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && matches!(args.as_slice(), [ - Expression::Reference(a), - Expression::Reference(b), + Expression::Reference(_,a), + Expression::Reference(_,b), ] if a.as_printed() == "a" && b.as_printed() == "b"))); - assert!(matches!(parse_ex("f(,a,b,)"), Err(_))); - assert!(matches!(parse_ex("f(a,,b,)"), Err(_))); - assert!(matches!(parse_ex("f(a,b,,)"), Err(_))); + assert!(parse_ex("f(,a,b,)").is_err()); + assert!(parse_ex("f(a,,b,)").is_err()); + assert!(parse_ex("f(a,b,,)").is_err()); assert!(matches!( parse_ex("f()()"), Ok(Expression::Call(f, CallKind::Normal, args)) if matches!(f.as_ref(), Expression::Call(inner, CallKind::Normal, inner_args) if - matches!(inner.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(inner.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && inner_args.is_empty()) && args.is_empty())); assert!(matches!( parse_ex("f() + 1"), Ok(Expression::Call(plus, CallKind::Infix, args)) if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ Expression::Call(subcall, CallKind::Normal, subargs), Expression::Value(ConstantValue::Integer(_, v1)) ] if matches!(v1, IntegerWithBase{ value: 1, .. }) && - matches!(subcall.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(subcall.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && subargs.is_empty()))); assert!(matches!( parse_ex("f(a + b, c*d)"), Ok(Expression::Call(eff, CallKind::Normal, args)) if - matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && matches!(args.as_slice(), [ Expression::Call(plus, CallKind::Infix, pargs), Expression::Call(times, CallKind::Infix, targs), ] if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && - matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && - matches!(pargs.as_slice(), [ Expression::Reference(a), Expression::Reference(b) ] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(_,a), Expression::Reference(_,b) ] if a.as_printed() == "a" && b.as_printed() == "b") && - matches!(targs.as_slice(), [ Expression::Reference(c), Expression::Reference(d) ] if + matches!(targs.as_slice(), [ Expression::Reference(_,c), Expression::Reference(_,d) ] if c.as_printed() == "c" && d.as_printed() == "d")))); assert!(matches!( parse_ex("f(a + b, c*d,)"), Ok(Expression::Call(eff, CallKind::Normal, args)) if - matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && matches!(args.as_slice(), [ Expression::Call(plus, CallKind::Infix, pargs), Expression::Call(times, CallKind::Infix, targs), ] if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && - matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && - matches!(pargs.as_slice(), [ Expression::Reference(a), Expression::Reference(b) ] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(_,a), Expression::Reference(_,b) ] if a.as_printed() == "a" && b.as_printed() == "b") && - matches!(targs.as_slice(), [ Expression::Reference(c), Expression::Reference(d) ] if + matches!(targs.as_slice(), [ Expression::Reference(_,c), Expression::Reference(_,d) ] if c.as_printed() == "c" && d.as_printed() == "d")))); assert!(matches!( parse_ex("3 + f(1 + 2)"), Ok(Expression::Call(plus, CallKind::Infix, args)) if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ Expression::Value(ConstantValue::Integer(_, v3)), Expression::Call(eff, CallKind::Normal, fargs) ] if matches!(v3, IntegerWithBase{ value: 3, .. }) && - matches!(eff.as_ref(), Expression::Reference(n) if n.as_printed() == "f") && + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && matches!(fargs.as_slice(), [Expression::Call(p, CallKind::Infix, pargs)] if - matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if matches!(v1, ConstantValue::Integer(_, IntegerWithBase { value: 1, .. })) && matches!(v2, ConstantValue::Integer(_, IntegerWithBase { value: 2, .. }))))))); @@ -728,11 +732,11 @@ fn calls() { parse_ex("(f . g)(1 + 2)"), Ok(Expression::Call(fg, CallKind::Normal, args)) if matches!(fg.as_ref(), Expression::Call(dot, CallKind::Infix, fgargs) if - matches!(dot.as_ref(), Expression::Reference(n) if n.as_printed() == ".") && - matches!(fgargs.as_slice(), [Expression::Reference(f), Expression::Reference(g)] if + matches!(dot.as_ref(), Expression::Reference(_,n) if n.as_printed() == ".") && + matches!(fgargs.as_slice(), [Expression::Reference(_,f), Expression::Reference(_,g)] if f.as_printed() == "f" && g.as_printed() == "g")) && matches!(args.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if matches!(v1, ConstantValue::Integer(_, IntegerWithBase{ value: 1, .. })) && matches!(v2, ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })))))); @@ -740,19 +744,19 @@ fn calls() { assert!(matches!( parse_ex("a + b(2 + 3) * c"), Ok(Expression::Call(plus, CallKind::Infix, pargs)) if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(pargs.as_slice(), [ - Expression::Reference(a), + Expression::Reference(_,a), Expression::Call(times, CallKind::Infix, targs) ] if a.as_printed() == "a" && - matches!(times.as_ref(), Expression::Reference(n) if n.as_printed() == "*") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && matches!(targs.as_slice(), [ Expression::Call(b, CallKind::Normal, bargs), - Expression::Reference(c), + Expression::Reference(_,c), ] if c.as_printed() == "c" && - matches!(b.as_ref(), Expression::Reference(n) if n.as_printed() == "b") && + matches!(b.as_ref(), Expression::Reference(_,n) if n.as_printed() == "b") && matches!(bargs.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if - matches!(plus.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(pargs.as_slice(), [ Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })), Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 3, .. })) @@ -776,56 +780,56 @@ fn prefix_and_postfix() { assert!(matches!( parse_ex("++a"), Ok(Expression::Call(pp, CallKind::Prefix, args)) if - matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); assert!(matches!( parse_ex("a--"), Ok(Expression::Call(pp, CallKind::Postfix, args)) if - matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a"))); + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); // the prefix is weaker than the postfix, so it should be the outside // operatotr assert!(matches!( parse_ex("++a--"), Ok(Expression::Call(pp, CallKind::Prefix, args)) if - matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && matches!(args.as_slice(), [Expression::Call(mm, CallKind::Postfix, args)] if - matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); // the prefix is stronger than the postfix, so it should be the inside // operator assert!(matches!( parse_ex("--a++"), Ok(Expression::Call(pp, CallKind::Postfix, args)) if - matches!(pp.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && matches!(args.as_slice(), [Expression::Call(mm, CallKind::Prefix, args)] if - matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "--") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); assert!(matches!( parse_ex("a++ + b"), Ok(Expression::Call(p, CallKind::Infix, args)) if - matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ Expression::Call(mm, CallKind::Postfix, args), - Expression::Reference(n) + Expression::Reference(_,n) ] if n.as_printed() == "b" && - matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "a")))); + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); assert!(matches!( parse_ex("a + ++ b"), Ok(Expression::Call(p, CallKind::Infix, args)) if - matches!(p.as_ref(), Expression::Reference(n) if n.as_printed() == "+") && + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && matches!(args.as_slice(), [ - Expression::Reference(n), + Expression::Reference(_,n), Expression::Call(mm, CallKind::Prefix, args), ] if n.as_printed() == "a" && - matches!(mm.as_ref(), Expression::Reference(n) if n.as_printed() == "++") && - matches!(args.as_slice(), [Expression::Reference(n)] if n.as_printed() == "b")))); + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "b")))); assert!(matches!( parse_ex("a * ++ b"), @@ -846,29 +850,29 @@ fn blocks() { Ok(Expression::Block(_, void)) if matches!(void.as_slice(), [Statement::Expression(call)] if matches!(call, Expression::Call(void, CallKind::Normal, vargs) if - matches!(void.as_ref(), Expression::Reference(n) if + matches!(void.as_ref(), Expression::Reference(_,n) if n.as_printed() == "%prim%void") && vargs.is_empty())))); assert!(matches!( parse_ex("{ x }"), Ok(Expression::Block(_, x)) if - matches!(x.as_slice(), [Statement::Expression(Expression::Reference(n))] if + matches!(x.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if n.as_printed() == "x"))); assert!(matches!( parse_ex("{ x; }"), Ok(Expression::Block(_, x)) if matches!(x.as_slice(), [ - Statement::Expression(Expression::Reference(n)), + Statement::Expression(Expression::Reference(_,n)), Statement::Expression(Expression::Call(primv, CallKind::Normal, vargs)), ] if n.as_printed() == "x" && vargs.is_empty() && - matches!(primv.as_ref(), Expression::Reference(n) if + matches!(primv.as_ref(), Expression::Reference(_,n) if n.as_printed() == "%prim%void")))); assert!(matches!( parse_ex("{ x; y }"), Ok(Expression::Block(_, x)) if matches!(x.as_slice(), [ - Statement::Expression(Expression::Reference(x)), - Statement::Expression(Expression::Reference(y)), + Statement::Expression(Expression::Reference(_,x)), + Statement::Expression(Expression::Reference(_,y)), ] if x.as_printed() == "x" && y.as_printed() == "y"))); } @@ -886,7 +890,7 @@ fn bindings() { matches!(x.as_slice(), [Statement::Binding(b), Statement::Expression(_)] if !b.mutable && b.variable.as_printed() == "x" && - matches!(b.value, Expression::Reference(ref n) if n.as_printed() == "y")))); + matches!(b.value, Expression::Reference(_,ref n) if n.as_printed() == "y")))); } #[test] @@ -900,25 +904,25 @@ fn conditionals() { assert!(matches!( parse_ex("if x { y } else { z }"), Ok(Expression::Conditional(cond)) if - matches!(cond.test.as_ref(), Expression::Reference(n) if n.as_printed() == "x") && + matches!(cond.test.as_ref(), Expression::Reference(_,n) if n.as_printed() == "x") && matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if - matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(n))] if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if n.as_printed() == "y")) && matches!(cond.alternative.as_ref(), Some(expr) if matches!(expr.as_ref(), Expression::Block(_, ast) if - matches!(ast.as_slice(), [Statement::Expression(Expression::Reference(n))] if + matches!(ast.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if n.as_printed() == "z"))))); assert!(matches!( parse_ex("if x { y }"), Ok(Expression::Conditional(cond)) if - matches!(cond.test.as_ref(), Expression::Reference(n) if n.as_printed() == "x") && + matches!(cond.test.as_ref(), Expression::Reference(_,n) if n.as_printed() == "x") && matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if - matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(n))] if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if n.as_printed() == "y")) && cond.alternative.is_none())); - assert!(matches!(parse_ex("if x v { z }"), Err(_))); + assert!(parse_ex("if x v { z }").is_err()); assert!(matches!( parse_ex("if x + y { z }"), diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 13c94fa..15c5e2d 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -27,6 +27,7 @@ pub enum Token { CloseBrace, Semi, Colon, + DoubleColon, Comma, BackTick, Arrow, @@ -61,6 +62,7 @@ impl fmt::Display for Token { Token::CloseBrace => write!(f, "}}"), Token::Semi => write!(f, ";"), Token::Colon => write!(f, ":"), + Token::DoubleColon => write!(f, "::"), Token::Comma => write!(f, ","), Token::BackTick => write!(f, "`"), Token::Arrow => write!(f, "->"), @@ -99,10 +101,7 @@ struct LexerState<'a> { impl<'a> From<&'a str> for Lexer<'a> { fn from(value: &'a str) -> Self { - Lexer::Working(LexerState { - stream: value.char_indices(), - buffer: None, - }) + Lexer::new(value) } } @@ -142,8 +141,7 @@ impl<'a> Iterator for Lexer<'a> { impl<'a> LexerState<'a> { fn next_char(&mut self) -> Option<(usize, char)> { - let result = self.buffer.take().or_else(|| self.stream.next()); - result + self.buffer.take().or_else(|| self.stream.next()) } fn stash_char(&mut self, idx: usize, c: char) { @@ -172,7 +170,6 @@ impl<'a> LexerState<'a> { '{' => return simple_response(Token::OpenBrace), '}' => return simple_response(Token::CloseBrace), ';' => return simple_response(Token::Semi), - ':' => return simple_response(Token::Colon), ',' => return simple_response(Token::Comma), '`' => return simple_response(Token::BackTick), '\\' => return simple_response(Token::Lambda(false)), @@ -182,6 +179,7 @@ impl<'a> LexerState<'a> { '\'' => return self.starts_with_single(token_start_offset), '\"' => return self.starts_with_double(token_start_offset), '-' => return self.starts_with_dash(token_start_offset), + ':' => return self.starts_with_colon(token_start_offset), _ => {} } @@ -519,6 +517,31 @@ impl<'a> LexerState<'a> { } } } + + fn starts_with_colon( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => Ok(Some(LocatedToken { + token: Token::Colon, + span: token_start_offset..token_start_offset + 1, + })), + + Some((pos, ':')) => Ok(Some(LocatedToken { + token: Token::DoubleColon, + span: token_start_offset..pos, + })), + + Some((pos, char)) => { + self.stash_char(pos, char); + Ok(Some(LocatedToken { + token: Token::Colon, + span: token_start_offset..token_start_offset + 1, + })) + } + } + } } proptest::proptest! { @@ -542,7 +565,7 @@ fn parsed_single_token(s: &str) -> Token { let mut tokens = Lexer::from(s); let result = tokens .next() - .expect(format!("Can get at least one token from {s:?}").as_str()) + .unwrap_or_else(|| panic!("Can get at least one token from {s:?}")) .expect("Can get a valid token.") .token; diff --git a/src/syntax/universe.rs b/src/syntax/universe.rs index 0e60a05..439250e 100644 --- a/src/syntax/universe.rs +++ b/src/syntax/universe.rs @@ -6,21 +6,17 @@ use memmap2::Mmap; use std::collections::HashMap; use std::path::{Path, PathBuf}; +#[derive(Default)] pub struct Universe { pub files: HashMap, pub modules: HashMap, } -impl Default for Universe { - fn default() -> Self { - Universe { - files: HashMap::new(), - modules: HashMap::new(), - } - } -} - impl Universe { + /// Add a file to this universe. + /// + /// This may result in other files being loaded on behalf of the file, if + /// (for example) the given file has imports. pub fn add_file>(&mut self, file: P) -> Result<(), ParserError> { let filename = file.as_ref().to_string_lossy().into_owned(); -- 2.53.0 From 45e49a4c840c68c456efe01fdb40012bb780ba01 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Tue, 11 Nov 2025 11:07:29 -0800 Subject: [PATCH 17/33] This is now tidy for the bits that exst. --- src/syntax/parse.rs | 431 ++++++++++++++----------------------- src/syntax/parser_tests.rs | 7 + 2 files changed, 168 insertions(+), 270 deletions(-) diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index d0c3df1..88357c6 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -185,6 +185,28 @@ impl<'lexer> Parser<'lexer> { } } + /// See if the next token is an operator, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_operator(&mut self, op: &'static str) -> Result { + match self.next()? { + None => Err(self.bad_eof(format!("looking for symbol '{op}'"))), + Some(ltoken) => match ltoken.token { + Token::OperatorName(s) if s.as_str() == op => Ok(self.to_location(ltoken.span)), + _ => { + self.save(ltoken.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: ltoken.span, + token: ltoken.token, + expected: format!("symbol {op}"), + }) + } + }, + } + } + /// See if the next token is the given one, as expected. /// /// If it isn't, this routine will provide an error, but it will make @@ -623,7 +645,7 @@ impl<'lexer> Parser<'lexer> { // x => x * fact(x - 1), // } // } - // + // // Or any of many variations of that. Token::OpenParen => { unimplemented!() @@ -668,6 +690,12 @@ impl<'lexer> Parser<'lexer> { } } + /// Parse a single expression out of the input stream. + /// + /// Because expressions can start with so many possible tokens, it's very + /// likely that if you call this, the input stream will be corrupted by any + /// errors this function returns. So you should be careful to only call it + /// in situations that don't require rollback. pub fn parse_expression(&mut self) -> Result { let next = self .next()? @@ -685,63 +713,35 @@ impl<'lexer> Parser<'lexer> { } } + /// Parse a match expression. + /// + /// This function does assume that the next token in the input stream will + /// be the "match" keyword, and will error immediately (albeit, saving the + /// stream) if it isn't. So you *can* use this if you're not sure this is + /// a match expression, and want to escape if it isn't. fn parse_match_expression(&mut self) -> Result { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for a 'match' to open a pattern match"))?; - - if !matches!(next.token, Token::ValueName(ref x) if x == "match") { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an 'match' to start a pattern match".into(), - }); - } - let start = self.to_location(next.span); - + let start = self.require_keyword("match")?; let value = Box::new(self.parse_arithmetic(0)?); - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; - if !matches!(next.token, Token::OpenBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an open brace after the match expression".into(), - }); - } + self.require_token(Token::OpenBrace, "start of a match case list")?; let mut cases = vec![]; - while let Some(case) = self.parse_match_case()? { cases.push(case); } - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; - if !matches!(next.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "a close brace to end a match expression".into(), - }); - } - let end = self.to_location(next.span); - - let location = start.extend_to(&end); - + let end = self.require_token(Token::CloseBrace, "end of a match case list")?; Ok(MatchExpr { - location, + location: start.extend_to(&end), value, cases, }) } + /// Parse a single match case. + /// + /// A match case consists of a pattern, a double-arrow, and then an expression + /// describing what to do if that pattern matches the expression. It may or may + /// not conclude with a comma. fn parse_match_case(&mut self) -> Result, ParserError> { // skip over anything we can just skip loop { @@ -764,18 +764,7 @@ impl<'lexer> Parser<'lexer> { } let pattern = self.parse_pattern()?; - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for an open brace after 'match'"))?; - if !matches!(next.token, Token::Arrow) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an arrow after a pattern, as part of a match case".into(), - }); - } + self.require_token(Token::Arrow, "after pattern in match clause")?; let consequent = self.parse_expression()?; @@ -785,6 +774,12 @@ impl<'lexer> Parser<'lexer> { })) } + /// Parse a pattern from the input stream. + /// + /// Patterns are a recursive, complex structure without a clear opening token. + /// So ... you better be sure that you want a pattern when you call this, + /// because you're almost certainly not going to be able to recover and try + /// something else if this breaks. pub fn parse_pattern(&mut self) -> Result { if let Ok(constant) = self.parse_constant() { return Ok(Pattern::Constant(constant)); @@ -815,21 +810,10 @@ impl<'lexer> Parser<'lexer> { fields.push(field_pattern) } - let final_brace = self.next()?.ok_or_else(|| { - self.bad_eof("looking for closing brace in structure pattern.") - })?; - if !matches!(final_brace.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: final_brace.span, - token: final_brace.token, - expected: "closing brace in structure pattern".into(), - }); - } - let final_brace_location = self.to_location(final_brace.span); - + let end = + self.require_token(Token::CloseBrace, "after structure pattern")?; let structure_pattern = StructurePattern { - location: start.extend_to(&final_brace_location), + location: start.extend_to(&end), type_name, fields, }; @@ -838,47 +822,18 @@ impl<'lexer> Parser<'lexer> { } Token::DoubleColon => { - let vname = self.next()?.ok_or_else(|| { - self.bad_eof("looking for enumeration value name in pattern") - })?; + let variant_name = + self.parse_type_name("enumeration pattern variant name")?; - let variant_name = match vname.token { - Token::TypeName(s) => { - let loc = self.to_location(vname.span.clone()); - Name::new(loc, s) - } - - _ => { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: vname.span, - token: vname.token, - expected: "enumeration value name in pattern".into(), - }); - } - }; - - let mut final_location = self.to_location(vname.span); + let mut final_location = variant_name.location().unwrap().clone(); let argument = if let Some(maybe_paren) = self.next()? { if matches!(maybe_paren.token, Token::OpenParen) { let sub_pattern = self.parse_pattern()?; - - let tok = self.next()?.ok_or_else(|| { - self.bad_eof( - "looking for close paren after enum value argument", - ) - })?; - if !matches!(tok.token, Token::CloseParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: tok.span, - token: tok.token, - expected: "close paren after enum value argument".into(), - }); - } - - final_location = self.to_location(tok.span); + final_location = self.require_token( + Token::CloseParen, + "after enumeration pattern argument", + )?; Some(Box::new(sub_pattern)) } else { @@ -918,6 +873,16 @@ impl<'lexer> Parser<'lexer> { } } + /// Parse a field pattern. + /// + /// For reference, a field pattern is either just the name of a field, or a name of a + /// field plus a colon and some form of subpattern. This can be used to either rename + /// a field or to only match when a field has a particular value. + /// + /// Regardless, this should start with a name, and if it doesn't start with a name, + /// we'll return Ok(None) to indicate that we're done parsing field patterns. If we + /// do get a name and then reach some sort of error, though, who knows what state we'll + /// end up in. fn parse_field_pattern(&mut self) -> Result)>, ParserError> { let next = self .next()? @@ -986,69 +951,42 @@ impl<'lexer> Parser<'lexer> { Ok(Some((name, sub_pattern))) } + /// Parse an if expression. + /// + /// Like many of these functions, there's a nice indicator immediately available to us + /// so that we know whether or not this is an if statement. If we don't see it, we will + /// return with an error but the input stream will be clean. However, if we do see one, + /// and there's an error down the line, then there's nothing we can do. fn parse_if_expression(&mut self) -> Result { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for an 'if' to start conditional"))?; - if !matches!(next.token, Token::ValueName(ref x) if x == "if") { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an 'if' to start a conditional".into(), - }); - } - let start = self.to_location(next.span); - + let start = self.require_keyword("if")?; let test = self.parse_arithmetic(0)?; let consequent = self.parse_block()?; + let mut alternative = None; - let maybe_else = self.next()?; - let (alternative, location) = match maybe_else { - Some(LocatedToken { - token: Token::ValueName(ref n), - .. - }) if n == "else" => { - let expr = self.parse_block()?; - let location = match expr { - Expression::Block(ref l, _) => l.clone(), - _ => panic!("How did parse_block not return a block?!"), - }; + if self.require_keyword("else").is_ok() { + alternative = Some(Box::new(self.parse_block()?)); + } - (Some(Box::new(expr)), location) - } - - _ => { - let location = match consequent { - Expression::Block(ref l, _) => l.clone(), - _ => panic!("How did parse_block not return a block?!"), - }; - - (None, location) - } - }; + let end = alternative + .as_ref() + .map(|x| x.location()) + .unwrap_or_else(|| consequent.location()); Ok(ConditionalExpr { - location: start.extend_to(&location), + location: start.extend_to(&end), test: Box::new(test), consequent: Box::new(consequent), alternative, }) } + /// Parse a block. + /// + /// A block starts with an open brace -- so if we don't see one, we'll exit cleanly -- + /// but gets real complicated after that. So, once again, be thoughtful about how this + /// is called. pub fn parse_block(&mut self) -> Result { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for open brace to start block"))?; - if !matches!(next.token, Token::OpenBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an open brace to start a block".into(), - }); - } - let start = self.to_location(next.span); + let start = self.require_token(Token::OpenBrace, "start of a block")?; let mut statements = vec![]; let mut ended_with_expr = false; @@ -1061,18 +999,7 @@ impl<'lexer> Parser<'lexer> { } } - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for statement or block close"))?; - if !matches!(next.token, Token::CloseBrace) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "a close brace to end a block".into(), - }); - } - let end = self.to_location(next.span); + let end = self.require_token(Token::CloseBrace, "end of a block")?; if !ended_with_expr { let void_name = Name::new(end.clone(), "%prim%void"); @@ -1084,104 +1011,61 @@ impl<'lexer> Parser<'lexer> { Ok(Expression::Block(start.extend_to(&end), statements)) } + /// Parse a statement, or return None if we're now done with parsing a block. + /// + /// We know we're done parsing a block when we hit a close brace, basically. We + /// should ignore excess semicolons cleanly, and that sort of thing. Because + /// statements vary pretty widely, you should not assume that the input is clean + /// on any sort of error. pub fn parse_statement(&mut self) -> Result, ParserError> { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; + loop { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; - match next.token { - Token::CloseBrace => { - self.save(next); - Ok(None) - } - - Token::ValueName(ref l) if l == "let" => { - self.save(next); - Ok(Some((Statement::Binding(self.parse_let()?), false))) - } - - _ => { - self.save(next); - let expr = Statement::Expression(self.parse_expression()?); - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; - - if matches!(next.token, Token::Semi) { - Ok(Some((expr, false))) - } else { + match next.token { + Token::CloseBrace => { self.save(next); - Ok(Some((expr, true))) + return Ok(None); + } + + Token::Semi => continue, + + Token::ValueName(ref l) if l == "let" => { + self.save(next); + return Ok(Some((Statement::Binding(self.parse_let()?), false))); + } + + _ => { + self.save(next); + let expr = Statement::Expression(self.parse_expression()?); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; + + if matches!(next.token, Token::Semi) { + return Ok(Some((expr, false))); + } else { + self.save(next); + return Ok(Some((expr, true))); + } } } } } + /// Parse a let statement. + /// + /// This will assume that the first token in the stream is a "let", and be upset if + /// it is not. However, it will be upset cleanly, which is nice. pub fn parse_let(&mut self) -> Result { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for a let for a binding statement"))?; - if !matches!(next.token, Token::ValueName(ref n) if n == "let") { - self.save(next.clone()); - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "a 'let' to open a binding statement".into(), - }); - } - let start = self.to_location(next.span); - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("'mut' or a variable name"))?; - let mutable = matches!(next.token, Token::ValueName(ref n) if n == "mut"); - if !mutable { - self.save(next); - } - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("a variable name"))?; - let variable = match next.token { - Token::ValueName(v) => Name::new(self.to_location(next.span), v), - _ => { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "a variable name for the let binding".into(), - }); - } - }; - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("an '=' after a variable name in a binding"))?; - if !matches!(next.token, Token::OperatorName(ref x) if x == "=") { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "an '=' after the variable name in a let binding".into(), - }); - } - + let start = self.require_keyword("let")?; + let mutable = self.require_keyword("mut").is_ok(); + let variable = self.parse_name("let binding")?; + let _ = self.require_operator("=")?; let value = self.parse_expression()?; - - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for terminal semicolon for let statement"))?; - if !matches!(next.token, Token::Semi) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "a semicolon to finish a let statement".into(), - }); - } - let end = self.to_location(next.span); + let end = self.require_token(Token::Semi, "let statement")?; Ok(BindingStmt { location: start.extend_to(&end), @@ -1191,6 +1075,17 @@ impl<'lexer> Parser<'lexer> { }) } + /// Parse an arithmetic expression, obeying the laws of precedence. + /// + /// This is an implementation of Pratt Parsing, although I've probably done it in + /// a much more awkward way than necessary. I was heavily inspired and/or stole + /// code directly from [this + /// article](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html), + /// which was instrumental in its design. All errors mine. + /// + /// Note that because arithmetic expressions can start with so many tokens, you + /// should only call this function if you are absolutely sure that there's an + /// expression waiting for you, and it would be an error if there wasn't. pub fn parse_arithmetic(&mut self, level: u8) -> Result { // start by checking for prefix operators. let next = self @@ -1277,20 +1172,14 @@ impl<'lexer> Parser<'lexer> { Ok(lhs) } + /// Parse the arguments to a function call. + /// + /// We assume that, at this point, you have eaten the thing you're calling out of + /// the input stream, and are on the parenthesis that defines the arguments to the + /// function. If you're not there, then this will error, but in a way that you can + /// recover from. fn parse_call_arguments(&mut self) -> Result, ParserError> { - let next = self - .next()? - .ok_or_else(|| self.bad_eof("looking for open paren for function arguments"))?; - - if !matches!(next.token, Token::OpenParen) { - return Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "open paren for call arguments".into(), - }); - } - + let _ = self.require_token(Token::OpenParen, "for function arguments")?; let mut args = vec![]; loop { @@ -1378,7 +1267,8 @@ impl<'lexer> Parser<'lexer> { fields.push(field); } - let brace = self.require_token(Token::CloseBrace, "end of structure value")?; + let brace = + self.require_token(Token::CloseBrace, "end of structure value")?; let sv = StructureExpr { location: self.to_location(next.span).extend_to(&brace), @@ -1413,7 +1303,8 @@ impl<'lexer> Parser<'lexer> { let (argument, end_loc) = if let Some(maybe_paren) = self.next()? { if matches!(maybe_paren.token, Token::OpenParen) { let expr = self.parse_expression()?; - let closer = self.require_token(Token::CloseParen, "after variant argument")?; + let closer = self + .require_token(Token::CloseParen, "after variant argument")?; (Some(Box::new(expr)), closer) } else { diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index a849689..fd7c577 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -867,6 +867,13 @@ fn blocks() { ] if n.as_printed() == "x" && vargs.is_empty() && matches!(primv.as_ref(), Expression::Reference(_,n) if n.as_printed() == "%prim%void")))); + assert!(matches!( + parse_ex("{ x;;; y }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(_,x)), + Statement::Expression(Expression::Reference(_,y)), + ] if x.as_printed() == "x" && y.as_printed() == "y"))); assert!(matches!( parse_ex("{ x; y }"), Ok(Expression::Block(_, x)) if -- 2.53.0 From c7951726920c97a54a3797ab7c72f98d1c450434 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Tue, 11 Nov 2025 13:42:19 -0800 Subject: [PATCH 18/33] All the unimplementeds are gone! --- src/syntax/ast.rs | 3 +- src/syntax/parse.rs | 184 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 177 insertions(+), 10 deletions(-) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 3d88c33..01b3a90 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -90,14 +90,15 @@ pub struct FunctionArg { pub struct ValueDef { pub name: Name, pub location: Location, + pub mtype: Option, pub value: Expression, } #[derive(Debug)] pub struct OperatorDef { pub operator_name: Name, + pub location: Location, pub function_name: Name, - location: Location, } #[derive(Debug)] diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index 88357c6..b0527ea 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -36,6 +36,14 @@ pub enum Associativity { None, } +/// The kind of operators we use. This is only narrowly useful inside +/// this particular crate. +enum OperatorType { + Prefix, + Infix, + Postfix, +} + impl<'lexer> Parser<'lexer> { /// Create a new parser from the given file index and lexer. /// @@ -614,12 +622,86 @@ impl<'lexer> Parser<'lexer> { /// to; that variable can be declared further on in the file or even in another module, /// as we won't try to resolve it until later. /// - /// Like most definitions, we'll abort cleanly if the first token isn't the "operator" - /// keyword, but all bets are off after that. + /// Like most definitions, we'll abort cleanly if the first token isn't "operator", + /// "infix", "postfix", or "prefix" keywords, but all bets are off after that. pub fn parse_operator(&mut self) -> Result { - let _operator = self.require_keyword("operator")?; + let (start, operator_type, associativity) = { + let mut optype = OperatorType::Infix; + let mut start = None; + let mut assoc = Associativity::None; - unimplemented!() + if let Ok(loc) = self.require_keyword("prefix") { + optype = OperatorType::Prefix; + start = Some(loc); + } else if let Ok(loc) = self.require_keyword("postfix") { + optype = OperatorType::Postfix; + start = Some(loc); + } else if let Ok(loc) = self.require_keyword("infix") { + start = Some(loc); + + if self.require_keyword("right").is_ok() { + assoc = Associativity::Right; + } else if self.require_keyword("left").is_ok() { + assoc = Associativity::Left; + } + } + + let oploc = self.require_keyword("operator")?; + (start.unwrap_or(oploc), optype, assoc) + }; + let operator_name = self.parse_operator_name("operator definition")?; + + let level = if self.require_keyword("at").is_ok() { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("precedence value in operator definition"))?; + + match next.token { + Token::Integer(int_with_base) if int_with_base.value < 10 => { + int_with_base.value as u8 + } + + Token::Integer(ref int_with_base) => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token.clone(), + expected: format!( + "number defining operator precedence ({} is too large", + int_with_base.value + ), + }); + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "number defining operator precedence".into(), + }); + } + } + } else { + 5 + }; + + let function_name = self.parse_name("operator function definition")?; + let end = self.require_token(Token::Semi, "end of operator definition")?; + + match operator_type { + OperatorType::Infix => { + self.add_infix_precedence(operator_name.as_printed(), associativity, level) + } + OperatorType::Prefix => self.add_prefix_precedence(operator_name.as_printed(), level), + OperatorType::Postfix => self.add_postfix_precedence(operator_name.as_printed(), level), + } + + Ok(OperatorDef { + location: start.extend_to(&end), + operator_name, + function_name, + }) } /// Parse a function or a value. @@ -648,33 +730,63 @@ impl<'lexer> Parser<'lexer> { // // Or any of many variations of that. Token::OpenParen => { - unimplemented!() + self.save(next); + let arguments = self.parse_function_def_arguments()?; + let mut return_type = None; + + if self.require_token(Token::Colon, "return type").is_ok() { + return_type = Some(self.parse_type()?); + } + + let Expression::Block(end, body) = self.parse_block()? else { + panic!("parse_block returned something that wasn't a block."); + }; + + Ok(Def::Function(FunctionDef { + name, + location: start.extend_to(&end), + arguments, + return_type, + body, + })) } // If we see a colon, then someone's giving us a type for what is probably // some form of simple constant, such as: // - // foo : Int = 4 + // foo : Int = 4; // // But honestly, there's a lot of odd possibilities of complicated things // they could write there. Token::Colon => { - unimplemented!() + let value_type = self.parse_type()?; + let _ = self.require_operator("=")?; + let value = self.parse_expression()?; + let end = self.require_token(Token::Semi, "at end of definition")?; + + Ok(Def::Value(ValueDef { + name, + location: start.extend_to(&end), + mtype: Some(value_type), + value, + })) } // If we see an equal sign, we're jumping right to the value part of the // definition, and we're doing something like this: // - // foo = 4 + // foo = 4; // // Again, though, you could write all sorts of interesting things after // that. Token::OperatorName(eq) if eq == "=" => { let value = self.parse_expression()?; + let end = self.require_token(Token::Semi, "at end of definition")?; Ok(Def::Value(ValueDef { name, - location: start.extend_to(&value.location()), + location: start.extend_to(&end), + mtype: None, value, })) } @@ -690,6 +802,38 @@ impl<'lexer> Parser<'lexer> { } } + /// Parse the arguments to a function declaration. + /// + /// Function arguments should have types, but don't have to. This function assumes + /// that it's starting at the opening parenthesis, and will error (cleanly) if it + /// isn't. + fn parse_function_def_arguments(&mut self) -> Result, ParserError> { + let _ = self.require_token(Token::OpenParen, "start of function argument definition")?; + let mut result = vec![]; + + loop { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("parsing function arguments"))?; + + if matches!(next.token, Token::CloseParen) { + break; + } + + self.save(next); + let name = self.parse_name("function argument name")?; + let mut arg_type = None; + + if self.require_token(Token::Colon, "").is_ok() { + arg_type = Some(self.parse_type()?); + } + + result.push(FunctionArg { name, arg_type }); + } + + Ok(result) + } + /// Parse a single expression out of the input stream. /// /// Because expressions can start with so many possible tokens, it's very @@ -1580,4 +1724,26 @@ impl<'lexer> Parser<'lexer> { }) } } + + /// Try to parse an operator from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_operator_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?; + + if let Token::OperatorName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for an operator name in {place}"), + }) + } + } } -- 2.53.0 From 1bc560f684f35baa55fb5e9f7a14344b365de9f7 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Tue, 11 Nov 2025 14:20:28 -0800 Subject: [PATCH 19/33] Almost ... there. --- src/syntax/ast.rs | 4 ++ src/syntax/parse.rs | 10 +++++ src/syntax/parser_tests.rs | 83 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 01b3a90..fc78da6 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -232,6 +232,10 @@ impl TypeRestrictions { restrictions: vec![], } } + + pub fn is_empty(&self) -> bool { + self.restrictions.is_empty() + } } #[derive(Debug)] diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index b0527ea..d7cfc06 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -261,6 +261,15 @@ impl<'lexer> Parser<'lexer> { } } + #[allow(unused)] + fn print_next_token(&mut self, comment: &str) { + let token = self.next().expect("can get token"); + println!("[{comment}] next token will be {:?}", token.as_ref().map(|x| x.token.clone())); + if let Some(token) = token { + self.save(token); + } + } + /// Parse a definition in a file (structure, enumeration, value, etc.). /// /// This will read a definition. If there's an error, it's very likely the @@ -394,6 +403,7 @@ impl<'lexer> Parser<'lexer> { let next = self .next()? .ok_or_else(|| self.bad_eof("looking for definition body"))?; + self.save(next.clone()); if let Ok(structure) = self.parse_structure() { return Ok(Def::Structure(structure)); diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index fd7c577..05c3e0a 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -1001,3 +1001,86 @@ fn patterns() { type_name.as_printed() == "Enumeration" && variant_name.as_printed() == "Value")))); } + +#[test] +fn definitions() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("x = 1;"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: None, value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(parse_def("x = 1").is_err()); + assert!(matches!( + parse_def("x: Integer = 1;"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export x: Integer = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export restrict() x: Integer = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export restrict(Numeric a) x: a = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + matches!(&type_restrictions, TypeRestrictions { restrictions } if + restrictions.len() == 1 && + matches!(restrictions.first(), Some(TypeRestriction{ constructor, arguments }) if + matches!(constructor, Type::Constructor(_, n) if n.as_printed() == "Numeric") && + matches!(arguments.as_slice(), [Type::Variable(_, n)] if n.as_printed() == "a"))) && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(t), value, .. }) if + name.as_printed() == "x" && + matches!(t, Type::Variable(_, n) if n.as_printed() == "a") && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("function() { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("function() { 1 }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("function(): Integer { 1 }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_some() && + body.len() == 1))); +} -- 2.53.0 From 90c5d6fef8350e1bc68e876bd341382fddce8911 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Tue, 11 Nov 2025 20:41:58 -0800 Subject: [PATCH 20/33] Tests. --- src/syntax/parse.rs | 11 ++-- src/syntax/parser_tests.rs | 103 ++++++++++++++++++++++++++++++++++++- src/syntax/tokens.rs | 26 ++++++++-- 3 files changed, 130 insertions(+), 10 deletions(-) diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index d7cfc06..ae49b4a 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -251,13 +251,12 @@ impl<'lexer> Parser<'lexer> { let mut definitions = vec![]; loop { - let next_token = self.next()?; - - if next_token.is_none() { + if let Some(next_token) = self.next()? { + self.save(next_token); + definitions.push(self.parse_definition()?); + } else { return Ok(Module { definitions }); } - - definitions.push(self.parse_definition()?); } } @@ -696,6 +695,8 @@ impl<'lexer> Parser<'lexer> { 5 }; + let _ = self.require_token(Token::Arrow, "operator definition")?; + let function_name = self.parse_name("operator function definition")?; let end = self.require_token(Token::Semi, "end of operator definition")?; diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index 05c3e0a..2219d7e 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -254,7 +254,6 @@ fn structures() { assert!(parse_st("structure {").is_err()); assert!(parse_st("structure foo {}").is_err()); - println!("result: {:?}", parse_st("structure Foo {}")); assert!(matches!( parse_st("structure Foo {}"), Ok(StructureDef { name, fields, .. }) @@ -480,7 +479,6 @@ fn structure_value() { assert!(parse_st("Foo{ foo, }").is_err()); assert!(parse_st("Foo{ foo: , }").is_err()); assert!(parse_st("Foo{ , foo: 1, }").is_err()); - println!("result: {:?}", parse_st("Foo{ foo: 1 }")); assert!(matches!( parse_st("Foo{ foo: 1 }"), Ok(Expression::Structure(sv)) @@ -1084,3 +1082,104 @@ fn definitions() { return_type.is_some() && body.len() == 1))); } + +#[test] +fn operators() { + let parse = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_module() + }; + + let all_the_operators = r#" +prefix operator - -> negate; +postfix operator ++ -> mutable_add; +infix left operator + -> sum; +infix right operator - -> subtract; +infix operator * at 8 -> multiply; +postfix operator ! at 3 -> factorial; +prefix operator $$ at 1 -> money; +"#; + + assert!(parse(all_the_operators).is_ok()); + + assert!(parse("left prefix operator - -> negate;").is_err()); + assert!(parse("right prefix operator - -> negate;").is_err()); + assert!(parse("right infix operator - -> negate;").is_err()); + assert!(parse("left infix operator - -> negate;").is_err()); + assert!(parse("infix operator at 8 - -> negate;").is_err()); + + + // these are designed to replicate the examples in the infix_and_precedence + // tests, but with the precedence set automatically by the parser. + let plus_and_times = |expr| format!(r#" +infix left operator + at 6 -> add; +infix right operator * at 7 -> mul; + +x = {expr}; +"#); + + let plus_example = plus_and_times("1 + 2 + 3"); + assert!(matches!( + parse(&plus_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(plus, CallKind::Infix, args) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(innerplus, CallKind::Infix, inner_args), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v3 == 3 && + matches!(innerplus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2))))))); + + let times_example = plus_and_times("1 * 2 * 3"); + assert!(matches!( + parse(×_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(times, CallKind::Infix, args) if + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Call(innertimes, CallKind::Infix, inner_args), + ] if *v1 == 1 && + matches!(innertimes.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v2 == 2 && *v3 == 3))))))); + + + let mixed_example = plus_and_times("1 + 2 * 3 + 4"); + assert!(matches!( + parse(&mixed_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(plus_right, CallKind::Infix, outer_args) if + matches!(plus_right.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(plus_left, CallKind::Infix, left_args), + Expression::Value(ConstantValue::Integer(_, v4)) + ] if + matches!(v4, IntegerWithBase{ value: 4, .. }) && + matches!(plus_left.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Call(times, CallKind::Infix, times_args) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(times_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v2)), + Expression::Value(ConstantValue::Integer(_, v3)) + ] if + matches!(v2, IntegerWithBase{ value: 2, .. }) && + matches!(v3, IntegerWithBase{ value: 3, .. }))))))))); +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 15c5e2d..2d245f7 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -411,12 +411,18 @@ impl<'a> LexerState<'a> { }); } - let mut value = 0; + let mut value: u32 = 0; while let Some((idx, char)) = self.next_char() { if let Some(digit) = char.to_digit(16) { - value = (value * 16) + digit; - continue; + if let Some(shifted) = value.checked_shl(4) { + value = shifted + digit; + continue; + } else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..idx, + }); + } } if char == '}' { @@ -730,3 +736,17 @@ fn arrow_requires_nonop() { let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::Arrow), next_token()); } + +#[test] +fn unicode() { + let mut lexer = Lexer::from("'\\u{00BE}'"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Character('¾')), next_token()); + + let mut lexer = Lexer::from("'\\u{111111111111}'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u{00BE'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u00BE}'"); + assert!(lexer.next().unwrap().is_err()); +} -- 2.53.0 From 2ef9ae8bdc7026307e9cfa4b912220ad67e655fe Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 18:31:44 -0800 Subject: [PATCH 21/33] Stuff and bother. --- Cargo.lock | 292 ++++++++------------ Cargo.toml | 5 + src/syntax.rs | 2 + src/syntax/arbitrary.rs | 548 +++++++++++++++++++++++++++++++++++++ src/syntax/ast.rs | 32 ++- src/syntax/location.rs | 34 ++- src/syntax/name.rs | 96 ++++++- src/syntax/parse.rs | 105 ++++--- src/syntax/parser_tests.rs | 323 +++++++++++++++++++++- src/syntax/print.rs | 70 +++++ src/syntax/tokens.rs | 132 ++++++++- 11 files changed, 1403 insertions(+), 236 deletions(-) create mode 100644 src/syntax/arbitrary.rs create mode 100644 src/syntax/print.rs diff --git a/Cargo.lock b/Cargo.lock index 6f6a4ea..352c0e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,15 +4,15 @@ version = 4 [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", - "getrandom 0.2.16", + "getrandom", "once_cell", "version_check", - "zerocopy 0.7.35", + "zerocopy", ] [[package]] @@ -28,10 +28,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" dependencies = [ "concolor", - "unicode-width", + "unicode-width 0.1.14", "yansi", ] +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "autocfg" version = "1.5.0" @@ -44,7 +50,9 @@ version = "0.1.0" dependencies = [ "ariadne", "internment", + "itertools", "memmap2", + "pretty", "proptest", "proptest-derive", "thiserror", @@ -73,15 +81,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "concolor" @@ -116,6 +124,12 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "equivalent" version = "1.0.2" @@ -152,25 +166,14 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "getrandom" -version = "0.2.16" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.1+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "wasip2", ] [[package]] @@ -210,26 +213,29 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "itertools" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] [[package]] name = "libc" -version = "0.2.176" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "linux-raw-sys" @@ -239,19 +245,18 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" dependencies = [ "libc", ] @@ -273,15 +278,15 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -290,28 +295,39 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.8.27", + "zerocopy", +] + +[[package]] +name = "pretty" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d22152487193190344590e4f30e219cf3fe140d9e7a3fdb683d82aa2c5f4156" +dependencies = [ + "arrayvec", + "termcolor", + "typed-arena", + "unicode-width 0.2.2", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.9.4", - "lazy_static", + "bitflags 2.10.0", "num-traits", "rand", "rand_chacha", @@ -341,9 +357,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.41" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -380,7 +396,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom", ] [[package]] @@ -394,18 +410,18 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.13" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", ] [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rustix" @@ -413,7 +429,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags 2.9.4", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys", @@ -446,9 +462,9 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.110" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" dependencies = [ "proc-macro2", "quote", @@ -462,12 +478,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom", "once_cell", "rustix", "windows-sys 0.61.2", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "2.0.17" @@ -488,6 +513,12 @@ dependencies = [ "syn", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "unarray" version = "0.1.4" @@ -496,9 +527,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-width" @@ -506,6 +537,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "version_check" version = "0.9.5" @@ -521,21 +558,6 @@ dependencies = [ "libc", ] -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -545,6 +567,15 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "windows-link" version = "0.2.1" @@ -557,16 +588,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets 0.42.2", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -584,29 +606,13 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] @@ -615,90 +621,42 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - [[package]] name = "wit-bindgen" version = "0.46.0" @@ -711,33 +669,13 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" -[[package]] -name = "zerocopy" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" -dependencies = [ - "zerocopy-derive 0.7.35", -] - [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ - "zerocopy-derive 0.8.27", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "zerocopy-derive", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 364d469..35c2c6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,12 @@ edition = "2024" [dependencies] ariadne = { version = "0.5.1", features = ["auto-color"] } internment = { version = "0.8.6", features = ["arc", "arena"] } +itertools = "0.14.0" memmap2 = "0.9.8" +pretty = { version = "0.12.5", features = ["termcolor"] } proptest = "1.7.0" proptest-derive = "0.6.0" thiserror = "2.0.12" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage)'] } diff --git a/src/syntax.rs b/src/syntax.rs index 08451b8..9edde17 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,3 +1,4 @@ +mod arbitrary; mod ast; mod error; mod location; @@ -5,6 +6,7 @@ mod name; mod parse; #[cfg(test)] mod parser_tests; +mod print; mod tokens; mod universe; diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs new file mode 100644 index 0000000..107346d --- /dev/null +++ b/src/syntax/arbitrary.rs @@ -0,0 +1,548 @@ +use std::fmt::Arguments; + +use crate::syntax::ast::{ConstantValue, IntegerWithBase, Type}; +use crate::syntax::location::Location; +use crate::syntax::name::Name; +use itertools::Itertools; +use proptest::arbitrary::Arbitrary; +use proptest::prelude::{BoxedStrategy, Rng}; +use proptest::prop_oneof; +use proptest::strategy::{NewTree, Strategy, ValueTree}; +use proptest::test_runner::TestRunner; + +const MAXIMUM_TYPE_DEPTH: usize = 5; +const MAXIMUM_TYPE_WIDTH: usize = 5; +const MAXIMUM_STRING_SIZE: usize = 32; +const PRIMITIVE_TYPES: &[&str] = &[ + "Char", "String", "I8", "I16", "I32", "I64", "U8", "U16", "U32", "U64", +]; + +#[derive(Debug, Default)] +pub struct TypeGenerationContext { + available_constructors: Vec, + available_variables: Vec, +} + +impl TypeGenerationContext { + fn generate_type(&mut self, runner: &mut TestRunner, depth: usize) -> Type { + let mut leaf_options = vec![]; + + if !self.available_constructors.is_empty() { + for name in self.available_constructors.iter() { + leaf_options.push(Type::Constructor( + Location::manufactured(), + name.clone(), + )); + } + } + + if !self.available_variables.is_empty() { + for name in self.available_variables.iter() { + leaf_options.push(Type::Variable( + Location::manufactured(), + name.clone(), + )); + } + } + + for prim in PRIMITIVE_TYPES.iter() { + leaf_options.push(Type::Primitive( + Location::manufactured(), + Name::new(Location::manufactured(), prim.to_string()), + )); + } + + if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) { + } + + let index = runner.rng().random_range(0..leaf_options.len()); + leaf_options.remove(index) + } +} + +#[derive(Clone)] +pub struct TypeGenerationTree { + current_value: Type, + parent: Option>, + untried_simplified_items: Option>, +} + +impl TypeGenerationTree { + /// Create a new type generation tree based on the given + /// initial value. + pub fn new(initial_value: Type) -> TypeGenerationTree { + TypeGenerationTree { + current_value: initial_value, + parent: None, + untried_simplified_items: None, + } + } +} + +fn generate_powerset(_: &[Type]) -> Vec> { + vec![] +} + +fn simplify_type(incoming: &Type) -> Vec { + match incoming { + Type::Primitive(_, _) => vec![], + Type::Constructor(_, _) => vec![], + Type::Variable(_, _) => vec![], + Type::Function(arg_types, ret_type) => { + let simplified_return_types = simplify_type(ret_type.as_ref()); + + // we do the following as a set of steps, choosing to go deep rather than + // broad immediately. So this works as follows: + // + // 1. If there are simplifications for the return type, then just + // return variations with the simplified return type. + // 2. If there are simplifications for the first argument, then + // just return variations with the first argument simplified. + // 3. Repeat for each of the arguments. + // 4. At this point, all the subtypes are as simple as they can + // be, so return a series of function types with fewer arguments. + // 5. If we are a function with no arguments, then just return + // the return type. + if !simplified_return_types.is_empty() { + return simplified_return_types + .into_iter() + .map(|ret| Type::Function(arg_types.clone(), Box::new(ret))) + .collect(); + } + + // now check the arguments, and see if we can simplify them in a + // better way. + for idx in 0..arg_types.len() { + let simplified_arguments = simplify_type(&arg_types[idx]); + + if simplified_arguments.is_empty() { + continue; + } + + let mut new_function_types = vec![]; + + for simplified_arg in simplified_arguments.into_iter() { + let mut new_args = vec![]; + + for item in &arg_types[0..idx] { + new_args.push(item.clone()); + } + new_args.push(simplified_arg); + for item in &arg_types[idx + 1..arg_types.len()] { + new_args.push(item.clone()); + } + + new_function_types.push(Type::Function(new_args, ret_type.clone())); + } + + if !new_function_types.is_empty() { + return new_function_types; + } + } + + // ok, all of the arguments and the return type are already as + // simple as they can be, so let's see if we can reduce the number + // of arguments. + let mut new_types = vec![]; + for args in arg_types.iter().powerset() { + if args.len() != arg_types.len() { + new_types.push(Type::Function( + args.into_iter().cloned().collect(), + ret_type.clone(), + )); + } + } + + if new_types.is_empty() { + vec![ret_type.as_ref().clone()] + } else { + new_types + } + } + + Type::Application(constructor_type, arg_types) => { + // much like functions, we're going to try to simplify the constructor, + // then we'll try to simplify the arguments, then we'll try to remove + // arguments. + let simplified_constructor = simplify_type(constructor_type.as_ref()); + + if !simplified_constructor.is_empty() { + return simplified_constructor + .into_iter() + .map(|c| Type::Application(Box::new(c), arg_types.clone())) + .collect(); + } + + // now check the arguments, and see if we can simplify them in a + // better way. + for idx in 0..arg_types.len() { + let simplified_arguments = simplify_type(&arg_types[idx]); + + if simplified_arguments.is_empty() { + continue; + } + + let mut new_appl_types = vec![]; + + for simplified_arg in simplified_arguments.into_iter() { + let mut new_args = vec![]; + + for item in &arg_types[0..idx] { + new_args.push(item.clone()); + } + new_args.push(simplified_arg); + for item in &arg_types[idx + 1..arg_types.len()] { + new_args.push(item.clone()); + } + + new_appl_types.push(Type::Application(constructor_type.clone(), new_args)); + } + + if !new_appl_types.is_empty() { + return new_appl_types; + } + } + + // and now we'll try to reduce types. + let mut new_types = vec![]; + for args in arg_types.iter().powerset() { + if args.len() != arg_types.len() { + new_types.push(Type::Application( + constructor_type.clone(), + args.into_iter().cloned().collect(), + )); + } + } + + if new_types.is_empty() { + vec![constructor_type.as_ref().clone()] + } else { + new_types + } + } + } +} + +impl ValueTree for TypeGenerationTree { + type Value = Type; + + fn current(&self) -> Self::Value { + self.current_value.clone() + } + + fn simplify(&mut self) -> bool { + match self.untried_simplified_items.as_mut() { + None => { + let mut simplified = simplify_type(&self.current_value) + .into_iter() + .map(|current_value| TypeGenerationTree { + current_value, + parent: Some(Box::new(self.clone())), + + untried_simplified_items: None, + }) + .collect::>(); + + match simplified.pop() { + None => { + self.untried_simplified_items = Some(simplified); + false + } + + Some(next_tree) => { + self.untried_simplified_items = Some(simplified); + *self = next_tree; + true + } + } + } + + Some(untried_simplifieds) => match untried_simplifieds.pop() { + None => false, + Some(x) => { + *self = x; + true + } + }, + } + } + + fn complicate(&mut self) -> bool { + match self.parent.take() { + None => false, + Some(x) => { + *self = *x; + true + } + } + } +} + +impl Strategy for TypeGenerationContext { + type Tree = TypeGenerationTree; + type Value = Type; + + fn new_tree(&self, _runner: &mut TestRunner) -> NewTree { + unimplemented!() + } +} + +impl Arbitrary for Type { + type Parameters = TypeGenerationContext; + type Strategy = TypeGenerationContext; + + fn arbitrary_with(_context: Self::Parameters) -> Self::Strategy { + unimplemented!() + } +} + +#[derive(Default)] +pub enum LegalConstantType { + #[default] + Any, + String, + Char, + Number, +} + +impl Arbitrary for ConstantValue { + type Parameters = LegalConstantType; + type Strategy = BoxedStrategy; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + match args { + LegalConstantType::Char => char::arbitrary() + .prop_map(|x| ConstantValue::Character(Location::manufactured(), x)) + .boxed(), + + LegalConstantType::String => { + proptest::collection::vec(proptest::char::any(), MAXIMUM_STRING_SIZE) + .prop_map(|x| { + ConstantValue::String(Location::manufactured(), String::from_iter(x)) + }) + .boxed() + } + + LegalConstantType::Number => { + let value_strat = u64::arbitrary(); + let base_strat = proptest::prop_oneof![ + proptest::strategy::Just(None), + proptest::strategy::Just(Some(2)), + proptest::strategy::Just(Some(8)), + proptest::strategy::Just(Some(10)), + proptest::strategy::Just(Some(16)), + ]; + + (value_strat, base_strat) + .prop_map(|(value, base)| { + ConstantValue::Integer( + Location::manufactured(), + IntegerWithBase { base, value }, + ) + }) + .boxed() + } + + LegalConstantType::Any => proptest::prop_oneof![ + Self::arbitrary_with(LegalConstantType::Char), + Self::arbitrary_with(LegalConstantType::String), + Self::arbitrary_with(LegalConstantType::Number), + ] + .boxed(), + } + } +} + +#[cfg(test)] +mod simplifiers { + use super::*; + + #[test] + fn types() { + let loc = Location::manufactured(); + let foo = Name::new(loc.clone(), "Foo"); + let primint = Type::Primitive(loc.clone(), Name::new(loc.clone(), "Int")); + let primchar = Type::Primitive(loc.clone(), Name::new(loc.clone(), "Char")); + let primstr = Type::Primitive(loc.clone(), Name::new(loc.clone(), "String")); + + assert_eq!( + simplify_type(&Type::Constructor(loc.clone(), foo.clone())), + vec![] + ); + assert_eq!( + simplify_type(&Type::Variable(loc.clone(), foo.clone())), + vec![] + ); + assert_eq!( + simplify_type(&Type::Primitive(loc.clone(), foo.clone())), + vec![] + ); + + assert_eq!( + simplify_type(&Type::Function(vec![], Box::new(primint.clone()))), + vec![primint.clone()] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + )), + vec![ + Type::Function(vec![], Box::new(primint.clone())), + Type::Function(vec![primint.clone()], Box::new(primint.clone())), + Type::Function(vec![primchar.clone()], Box::new(primint.clone())), + ] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + )), + vec![ + Type::Function(vec![], Box::new(primint.clone())), + Type::Function(vec![primint.clone()], Box::new(primint.clone())), + Type::Function(vec![primchar.clone()], Box::new(primint.clone())), + Type::Function(vec![primstr.clone()], Box::new(primint.clone())), + Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + ), + Type::Function( + vec![primint.clone(), primstr.clone()], + Box::new(primint.clone()) + ), + Type::Function( + vec![primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + ), + ] + ); + + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function(vec![], Box::new(primint.clone()))), + )), + vec![Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + ),] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + )), + )), + vec![ + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function(vec![], Box::new(primint.clone()))) + ), + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primint.clone()], + Box::new(primint.clone()) + )) + ), + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primchar.clone()], + Box::new(primint.clone()) + )) + ), + ] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![ + Type::Function(vec![], Box::new(primint.clone())), + primstr.clone() + ], + Box::new(primint.clone()) + )), + vec![Type::Function( + vec![primint.clone(), primstr.clone()], + Box::new(primint.clone()) + )] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![ + primint.clone(), + Type::Function(vec![], Box::new(primint.clone())) + ], + Box::new(primint.clone()) + )), + vec![Type::Function( + vec![primint.clone(), primint.clone()], + Box::new(primint.clone()) + )] + ); + + let applied = Type::Application(Box::new(primint.clone()), vec![]); + assert_eq!( + simplify_type(&Type::Application(Box::new(primint.clone()), vec![])), + vec![primint.clone()] + ); + assert_eq!(simplify_type(&applied), vec![primint.clone()]); + assert_eq!( + simplify_type(&Type::Application( + Box::new(applied.clone()), + vec![primint.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primint.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![applied.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primint.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), applied.clone(), primstr.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone(), primstr.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone(), primstr.clone()] + )), + vec![ + Type::Application(Box::new(primint.clone()), vec![]), + Type::Application(Box::new(primint.clone()), vec![primchar.clone()]), + Type::Application(Box::new(primint.clone()), vec![primint.clone()]), + Type::Application(Box::new(primint.clone()), vec![primstr.clone()]), + Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone()] + ), + Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primstr.clone()] + ), + Type::Application( + Box::new(primint.clone()), + vec![primint.clone(), primstr.clone()] + ) + ] + ); + } +} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index fc78da6..64837b9 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -244,7 +244,7 @@ pub struct TypeRestriction { pub arguments: Vec, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum Type { Constructor(Location, Name), Variable(Location, Name), @@ -253,6 +253,22 @@ pub enum Type { Function(Vec, Box), } +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + match self { + Type::Constructor(_, x) => matches!(other, Type::Constructor(_, y) if x == y), + Type::Variable(_, x) => matches!(other, Type::Variable(_, y) if x == y), + Type::Primitive(_, x) => matches!(other, Type::Primitive(_, y) if x == y), + Type::Application(con1, args1) => { + matches!(other, Type::Application(con2, args2) if con1 == con2 && args1 == args2) + } + Type::Function(args1, ret1) => { + matches!(other, Type::Function(args2, ret2) if args1 == args2 && ret1 == ret2) + } + } + } +} + impl Located for Type { fn location(&self) -> Location { match self { @@ -277,7 +293,7 @@ impl Located for Type { } } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum ConstantValue { Integer(Location, IntegerWithBase), Character(Location, char), @@ -294,6 +310,18 @@ impl Located for ConstantValue { } } +impl PartialEq for ConstantValue { + fn eq(&self, other: &Self) -> bool { + match self { + ConstantValue::Character(_, x) => { + matches!(other, ConstantValue::Character(_, y) if x == y) + } + ConstantValue::String(_, x) => matches!(other, ConstantValue::String(_, y) if x == y), + ConstantValue::Integer(_, x) => matches!(other, ConstantValue::Integer(_, y) if x == y), + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] pub struct IntegerWithBase { #[proptest(strategy = "proptest::prop_oneof![ \ diff --git a/src/syntax/location.rs b/src/syntax/location.rs index cd8bc78..2bc59f0 100644 --- a/src/syntax/location.rs +++ b/src/syntax/location.rs @@ -38,6 +38,13 @@ impl Location { } } + pub fn manufactured() -> Self { + Location { + file: ArcIntern::new("".into()), + span: 0..0, + } + } + pub fn extend_to(&self, other: &Location) -> Location { assert_eq!(self.file, other.file); Location { @@ -50,12 +57,23 @@ impl Location { self.span = min(self.span.start, span.start)..max(self.span.end, span.end); self } - - pub fn file(&self) -> &str { - self.file.to_str().unwrap_or("") - } - - pub fn span(&self) -> Range { - self.span.clone() - } +} + +#[test] +fn extension_and_merge() { + let file = ArcIntern::new("/foo/bar.txt".into()); + let loc1 = Location::new(&file, 1..4); + let loc2 = Location::new(&file, 4..8); + + assert_eq!(loc1.extend_to(&loc2).source(), &file); + assert_eq!(loc1.extend_to(&loc2).start(), 1); + assert_eq!(loc1.extend_to(&loc2).end(), 8); + + let loc3 = Location::new(&file, 12..16); + assert_eq!(loc1.extend_to(&loc3).source(), &file); + assert_eq!(loc1.extend_to(&loc3).start(), 1); + assert_eq!(loc1.extend_to(&loc3).end(), 16); + + assert_eq!(loc1.clone().merge_span(0..1).start(), 0); + assert_eq!(loc1.merge_span(0..1).end(), 4); } diff --git a/src/syntax/name.rs b/src/syntax/name.rs index 7b48d51..ad7ef8c 100644 --- a/src/syntax/name.rs +++ b/src/syntax/name.rs @@ -1,12 +1,14 @@ use crate::syntax::Location; +#[cfg(test)] +use internment::ArcIntern; use std::cmp; use std::fmt; -use std::hash; +use std::hash::{Hash, Hasher}; use std::sync::atomic::{AtomicU64, Ordering}; static IDENTIFIER_COUNTER: AtomicU64 = AtomicU64::new(0); -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Name { printable: String, identifier: u64, @@ -21,8 +23,8 @@ impl cmp::PartialEq for Name { impl cmp::Eq for Name {} -impl hash::Hash for Name { - fn hash(&self, state: &mut H) { +impl Hash for Name { + fn hash(&self, state: &mut H) { self.identifier.hash(state); } } @@ -66,3 +68,89 @@ impl Name { self.location.as_ref() } } + +#[test] +fn equality() { + let file = ArcIntern::new("/foo.bang".into()); + let loc1 = Location::new(&file, 0..3); + let loc2 = Location::new(&file, 9..12); + + assert_ne!(Name::gensym("x"), Name::gensym("x")); + assert_ne!(Name::new(loc1.clone(), "x"), Name::new(loc1.clone(), "x")); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc2.clone()) + } + ); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "x".into(), + identifier: 5, + location: None + } + ); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "y".into(), + identifier: 5, + location: None + } + ); +} + +#[test] +fn hashing() { + let file = ArcIntern::new("/foo.bang".into()); + let loc1 = Location::new(&file, 0..3); + let loc2 = Location::new(&file, 9..12); + + let x1 = Name { + printable: "x".into(), + identifier: 1, + location: Some(loc1), + }; + let mut x2 = Name { + printable: "x".into(), + identifier: 2, + location: Some(loc2), + }; + let y1 = Name { + printable: "y".into(), + identifier: 1, + location: None, + }; + + let run_hash = |name: &Name| { + let mut hash = std::hash::DefaultHasher::new(); + name.hash(&mut hash); + hash.finish() + }; + + let hash_x1 = run_hash(&x1); + let hash_x2 = run_hash(&x2); + let hash_y1 = run_hash(&y1); + + assert_ne!(hash_x1, hash_x2); + assert_eq!(hash_x1, hash_y1); + + x2.bind_to(&x1); + let rehashed_x2 = run_hash(&x2); + assert_eq!(hash_x1, rehashed_x2); +} diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs index ae49b4a..5025c5e 100644 --- a/src/syntax/parse.rs +++ b/src/syntax/parse.rs @@ -261,9 +261,13 @@ impl<'lexer> Parser<'lexer> { } #[allow(unused)] + #[cfg(not(coverage))] fn print_next_token(&mut self, comment: &str) { let token = self.next().expect("can get token"); - println!("[{comment}] next token will be {:?}", token.as_ref().map(|x| x.token.clone())); + println!( + "[{comment}] next token will be {:?}", + token.as_ref().map(|x| x.token.clone()) + ); if let Some(token) = token { self.save(token); } @@ -379,14 +383,7 @@ impl<'lexer> Parser<'lexer> { arguments, }; - let Some(maybe_comma) = self.next()? else { - return Ok(Some(restriction)); - }; - - match maybe_comma.token { - Token::Comma => {} - _ => self.save(maybe_comma), - } + let _ = self.require_token(Token::Comma, ""); Ok(Some(restriction)) } @@ -402,30 +399,37 @@ impl<'lexer> Parser<'lexer> { let next = self .next()? .ok_or_else(|| self.bad_eof("looking for definition body"))?; - self.save(next.clone()); - if let Ok(structure) = self.parse_structure() { - return Ok(Def::Structure(structure)); + match next.token { + Token::ValueName(ref x) if x == "structure" => { + self.save(next); + Ok(Def::Structure(self.parse_structure()?)) + } + + Token::ValueName(ref x) if x == "enumeration" => { + self.save(next); + Ok(Def::Enumeration(self.parse_enumeration()?)) + } + + Token::ValueName(ref x) + if x == "operator" || x == "prefix" || x == "infix" || x == "postfix" => + { + self.save(next); + Ok(Def::Operator(self.parse_operator()?)) + } + + Token::ValueName(_) => { + self.save(next); + self.parse_function_or_value() + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "'structure', 'enumeration', 'operator', or a value identifier".into(), + }), } - - if let Ok(enumeration) = self.parse_enumeration() { - return Ok(Def::Enumeration(enumeration)); - } - - if let Ok(operator) = self.parse_operator() { - return Ok(Def::Operator(operator)); - } - - if let Ok(fun_or_val) = self.parse_function_or_value() { - return Ok(fun_or_val); - } - - Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: next.span, - token: next.token, - expected: "'structure', 'enumeration', or a value identifier".into(), - }) } /// Parse a structure definition. @@ -821,6 +825,7 @@ impl<'lexer> Parser<'lexer> { fn parse_function_def_arguments(&mut self) -> Result, ParserError> { let _ = self.require_token(Token::OpenParen, "start of function argument definition")?; let mut result = vec![]; + let mut just_skipped_comma = false; loop { let next = self @@ -831,7 +836,22 @@ impl<'lexer> Parser<'lexer> { break; } + if matches!(next.token, Token::Comma) { + if just_skipped_comma { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "after another comma in function arguments".into(), + }); + } + + just_skipped_comma = true; + continue; + } + self.save(next); + just_skipped_comma = false; let name = self.parse_name("function argument name")?; let mut arg_type = None; @@ -992,6 +1012,7 @@ impl<'lexer> Parser<'lexer> { Some(Box::new(sub_pattern)) } else { + self.save(maybe_paren); None } } else { @@ -1410,9 +1431,12 @@ impl<'lexer> Parser<'lexer> { Token::TypeName(n) | Token::PrimitiveTypeName(n) => { let type_name = Name::new(self.to_location(next.span.clone()), n); - let after_type_name = self.next()?.ok_or_else(|| { - self.bad_eof("looking for colon, open brace, or open paren in constructor") - })?; + let Some(after_type_name) = self.next()? else { + return Ok(Expression::Reference( + type_name.location().unwrap().clone(), + type_name, + )); + }; match after_type_name.token { Token::OpenBrace => { @@ -1480,12 +1504,13 @@ impl<'lexer> Parser<'lexer> { Ok(Expression::Enumeration(ev)) } - _ => Err(ParserError::UnexpectedToken { - file: self.file.clone(), - span: after_type_name.span, - token: after_type_name.token, - expected: "colon, open brace, or open paren in constructor".into(), - }), + _ => { + self.save(after_type_name); + Ok(Expression::Reference( + type_name.location().unwrap().clone(), + type_name, + )) + } } } diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs index 2219d7e..0a214a4 100644 --- a/src/syntax/parser_tests.rs +++ b/src/syntax/parser_tests.rs @@ -114,6 +114,9 @@ fn types() { if b1.as_printed() == "a" && b2.as_printed() == "b")) && matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") )); + assert!(parse_type("Cons a b ->").is_err()); + assert!(parse_type("(Cons a b) (Cons a b)").is_err()); + assert!(parse_type("(Cons a b) (Cons a b) :").is_err()); } #[test] @@ -129,6 +132,17 @@ fn type_restrictions() { Ok(TypeRestrictions{ restrictions }) if restrictions.is_empty() )); + assert!(matches!( + parse_tr("restrict(prim%Cons a b)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Primitive(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); + assert!(matches!( parse_tr("restrict(Cons a b)"), Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && @@ -151,6 +165,8 @@ fn type_restrictions() { matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); + assert!(parse_tr("restrict(cons a b,)").is_err()); + assert!(parse_tr("restrict(,Cons a b,)").is_err()); assert!(matches!( @@ -216,6 +232,9 @@ fn field_definition() { if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && args.is_empty()))); + assert!(parse_fd("foo :: Word8,").is_err()); + assert!(parse_fd("foo: Word8;").is_err()); + assert!(matches!( parse_fd("foo: Cons a b,"), Ok(Some(StructureField{ name, field_type, .. })) @@ -400,6 +419,8 @@ fn enumerations() { EnumerationVariant { name: name2, argument: arg2, ..}, ] if name1.as_printed() == "A" && arg1.is_none() && name2.as_printed() == "B" && arg2.is_none()))); + assert!(parse_en("enumeration Alternates { A").is_err()); + assert!(parse_en("enumeration Alternates { A; B }").is_err()); assert!(matches!( parse_en("enumeration Alternates { A, B, }"), Ok(EnumerationDef { name, variants, .. }) @@ -423,6 +444,9 @@ fn expressions() { assert!(matches!( parse_ex("x"), Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); + assert!(matches!( + parse_ex("X"), + Ok(Expression::Reference(_,n)) if n.as_printed() == "X")); assert!(matches!( parse_ex("(x)"), Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); @@ -453,6 +477,7 @@ fn enumeration_values() { }; assert!(parse_ex("Hello::world").is_err()); + assert!(parse_ex("Hello::world(a,b)").is_err()); assert!(matches!( parse_ex("Hello::World"), Ok(Expression::Enumeration(ev)) @@ -465,6 +490,14 @@ fn enumeration_values() { if ev.type_name.as_printed() == "Hello" && ev.variant_name.as_printed() == "World" && ev.argument.is_some())); + assert!(matches!( + parse_ex("Hello::World + 1"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(_, n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Enumeration(_), + Expression::Value(_) + ]))); } #[test] @@ -475,6 +508,7 @@ fn structure_value() { result.parse_expression() }; + assert!(parse_st("Foo{").is_err()); assert!(parse_st("Foo{ , }").is_err()); assert!(parse_st("Foo{ foo, }").is_err()); assert!(parse_st("Foo{ foo: , }").is_err()); @@ -635,6 +669,11 @@ fn calls() { Ok(Expression::Call(f, CallKind::Normal, args)) if matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && args.is_empty())); + assert!(parse_ex("f(").is_err()); + assert!(parse_ex("f(a").is_err()); + assert!(parse_ex("f(a,b").is_err()); + assert!(parse_ex("f(a,b,").is_err()); + assert!(parse_ex("f(a,b ::").is_err()); assert!(matches!( parse_ex("f(a)"), Ok(Expression::Call(f, CallKind::Normal, args)) if @@ -656,6 +695,14 @@ fn calls() { Expression::Reference(_,a), Expression::Reference(_,b), ] if a.as_printed() == "a" && b.as_printed() == "b"))); + assert!(matches!( + parse_ex("f(A,b,)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(_,a), + Expression::Reference(_,b), + ] if a.as_printed() == "A" && b.as_printed() == "b"))); assert!(parse_ex("f(,a,b,)").is_err()); assert!(parse_ex("f(a,,b,)").is_err()); assert!(parse_ex("f(a,b,,)").is_err()); @@ -833,6 +880,9 @@ fn prefix_and_postfix() { parse_ex("a * ++ b"), Err(ParserError::UnexpectedToken{ token: Token::OperatorName(pp), .. }) if pp == "++")); + + // this is a little bit of a weird case. + assert!(parse_ex("**").is_err()); } #[test] @@ -1083,6 +1133,154 @@ fn definitions() { body.len() == 1))); } +#[test] +fn functions() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("function() { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("fun(a) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [FunctionArg{ name, arg_type: None }] if + name.as_printed() == "a")))); + assert!(matches!( + parse_def("fun(a,b) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a,b,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a:U8,b,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: Some(Type::Application(atype, atype_args)) }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + matches!(atype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + atype_args.is_empty() && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a:U8,b:U8,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: Some(Type::Application(atype, atype_args)) }, + FunctionArg{ name: bname, arg_type: Some(Type::Application(btype, btype_args)) } + ] if + aname.as_printed() == "a" && + matches!(atype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + atype_args.is_empty() && + bname.as_printed() == "b" && + matches!(btype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + btype_args.is_empty())))); + assert!(matches!( + parse_def("fun(a,b:U8,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: Some(Type::Application(btype, btype_args)) } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b" && + matches!(btype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + btype_args.is_empty())))); + assert!(parse_def("fun(a,,b,) { }").is_err()); +} + +#[test] +fn definition_types() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("x: prim%U8 = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + args.is_empty() && + matches!(f.as_ref(), Type::Primitive(_, name) if + name.as_printed() == "U8"))))); + assert!(matches!( + parse_def("x: Stupid Monad prim%U8 = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + matches!(f.as_ref(), Type::Constructor(_, name) if + name.as_printed() == "Stupid") && + matches!(args.as_slice(), [Type::Constructor(_, cname), Type::Primitive(_, pname)] if + cname.as_printed() == "Monad" && + pname.as_printed() == "U8"))))); + assert!(matches!( + parse_def("x: Stupid (Monad prim%U8) = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + matches!(f.as_ref(), Type::Constructor(_, name) if + name.as_printed() == "Stupid") && + matches!(args.as_slice(), [Type::Application(cname, args2)] if + matches!(cname.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Monad") && + matches!(args2.as_slice(), [Type::Primitive(_, pname)] if + pname.as_printed() == "U8")))))); + assert!(parse_def("x: Stupid (Monad prim%U8 = 1;").is_err()); +} + #[test] fn operators() { let parse = |str| { @@ -1108,16 +1306,21 @@ prefix operator $$ at 1 -> money; assert!(parse("right infix operator - -> negate;").is_err()); assert!(parse("left infix operator - -> negate;").is_err()); assert!(parse("infix operator at 8 - -> negate;").is_err()); - + assert!(parse("infix operator * at 16 -> multiply;").is_err()); + assert!(parse("infix operator * at apple -> multiply;").is_err()); // these are designed to replicate the examples in the infix_and_precedence // tests, but with the precedence set automatically by the parser. - let plus_and_times = |expr| format!(r#" + let plus_and_times = |expr| { + format!( + r#" infix left operator + at 6 -> add; infix right operator * at 7 -> mul; x = {expr}; -"#); +"# + ) + }; let plus_example = plus_and_times("1 + 2 + 3"); assert!(matches!( @@ -1155,7 +1358,6 @@ x = {expr}; Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) ] if *v2 == 2 && *v3 == 3))))))); - let mixed_example = plus_and_times("1 + 2 * 3 + 4"); assert!(matches!( parse(&mixed_example), @@ -1183,3 +1385,116 @@ x = {expr}; matches!(v2, IntegerWithBase{ value: 2, .. }) && matches!(v3, IntegerWithBase{ value: 3, .. }))))))))); } + +#[test] +fn pattern_match() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("match x { }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + cases.is_empty())); + assert!(matches!( + parse_ex("match x { 1 -> 2 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { 1 -> 2, }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { 1 -> 2, 3 -> 4 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [mcase1, mcase2] if + matches!(mcase1, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)) && + matches!(mcase2, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 3) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 4))))); + assert!(matches!( + parse_ex("match x { y -> 2, }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Variable(n) if n.as_printed() == "y") && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { Option::None -> 2, Option::Some(x) -> 4 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [mcase1, mcase2] if + matches!(mcase1, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::EnumerationValue(ep) if + matches!(ep, EnumerationPattern{ type_name, variant_name, argument: None, .. } if + type_name.as_printed() == "Option" && + variant_name.as_printed() == "None")) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)) && + matches!(mcase2, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::EnumerationValue(ep) if + matches!(ep, EnumerationPattern{ type_name, variant_name, argument: Some(subp), .. } if + type_name.as_printed() == "Option" && + variant_name.as_printed() == "Some" && + matches!(subp.as_ref(), Pattern::Variable(n) if n.as_printed() == "x"))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 4))))); + assert!(matches!( + parse_ex("match x { Foo{ a, b: 2, c: d } -> 6 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase{ pattern, consequent }] if + matches!(pattern, Pattern::Structure(StructurePattern{ type_name, fields, .. }) if + type_name.as_printed() == "Foo" && + matches!(fields.as_slice(), [(field1, None), (field2, Some(pat2)), (field3, Some(pat3))] if + field1.as_printed() == "a" && + field2.as_printed() == "b" && + field3.as_printed() == "c" && + matches!(pat2, Pattern::Constant(ConstantValue::Integer(_, iwb)) if iwb.value == 2) && + matches!(pat3, Pattern::Variable(n) if n.as_printed() == "d"))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 6)))); + assert!(matches!( + parse_ex("match x { Foo{ a, b: 2, c } -> 6 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase{ pattern, consequent }] if + matches!(pattern, Pattern::Structure(StructurePattern{ type_name, fields, .. }) if + type_name.as_printed() == "Foo" && + matches!(fields.as_slice(), [(field1, None), (field2, Some(pat2)), (field3, None)] if + field1.as_printed() == "a" && + field2.as_printed() == "b" && + field3.as_printed() == "c" && + matches!(pat2, Pattern::Constant(ConstantValue::Integer(_, iwb)) if iwb.value == 2))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 6)))); + + assert!(parse_ex("match x { Foo -> 3 }").is_err()); + assert!(parse_ex("match x { (4) -> 3 }").is_err()); + assert!(parse_ex("match x { Foo{ 3, x } -> 3 }").is_err()); + assert!(parse_ex("match x { Foo{ x").is_err()); + assert!(parse_ex("match x { Foo{ x: 3").is_err()); + assert!(parse_ex("match x { Foo{ x:: 3").is_err()); + assert!(parse_ex("match x { Foo{ x: 3 4 } -> 4 }").is_err()); +} diff --git a/src/syntax/print.rs b/src/syntax/print.rs new file mode 100644 index 0000000..71a0242 --- /dev/null +++ b/src/syntax/print.rs @@ -0,0 +1,70 @@ +use crate::syntax::ast::{ConstantValue, Type}; +#[cfg(test)] +use crate::syntax::parse::Parser; +#[cfg(test)] +use crate::syntax::tokens::Lexer; +use pretty::{DocAllocator, Pretty}; + +impl<'a, D: ?Sized + DocAllocator<'a, A>, A: 'a> Pretty<'a, D, A> for Type { + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + Type::Constructor(_, n) => allocator.as_string(n), + Type::Variable(_, n) => allocator.as_string(n), + Type::Primitive(_, n) => allocator.text("prim%").append(allocator.as_string(n)), + + Type::Application(c, args) => c + .pretty(allocator) + .append(allocator.space()) + .append(allocator.intersperse(args, " ")), + + Type::Function(args, ret) => allocator + .intersperse(args, " ") + .append(allocator.space()) + .append(ret.pretty(allocator)), + } + } +} + +impl<'a, D: ?Sized + DocAllocator<'a, A>, A: 'a> Pretty<'a, D, A> for ConstantValue { + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + ConstantValue::String(_, x) => allocator.text(format!("{x:?}")), + ConstantValue::Character(_, c) => allocator.text(format!("{c:?}")), + ConstantValue::Integer(_, iwb) => match iwb.base { + None => allocator.as_string(iwb.value), + Some(2) => allocator.text(format!("0b{:b}", iwb.value)), + Some(8) => allocator.text(format!("0o{:o}", iwb.value)), + Some(10) => allocator.text(format!("0d{}", iwb.value)), + Some(16) => allocator.text(format!("0x{:x}", iwb.value)), + Some(x) => panic!("Illegal base {x} for integer constant."), + }, + } + } +} + +proptest::proptest! { + #[test] + fn constants(x: ConstantValue) { + let allocator: pretty::Arena = pretty::Arena::new(); + let docbuilder = x.clone().pretty(&allocator); + let mut string_version = String::new(); + docbuilder.render_fmt(80, &mut string_version).expect("can render to string"); + let lexer = Lexer::from(string_version.as_str()); + let mut parser = Parser::new("test", lexer); + let roundtripped = parser.parse_constant().expect("can parse constant"); + proptest::prop_assert_eq!(x, roundtripped); + } + +// #[test] +// fn types(x: Type) { +// let allocator: pretty::Arena = pretty::Arena::new(); +// let docbuilder = x.clone().pretty(&allocator); +// let mut string_version = String::new(); +// docbuilder.render_fmt(80, &mut string_version).expect("can render to string"); +// println!("String version: {string_version:?}"); +// let lexer = Lexer::from(string_version.as_str()); +// let mut parser = Parser::new("test", lexer); +// let roundtripped = parser.parse_type().expect("can parse constant"); +// proptest::prop_assert_eq!(x, roundtripped); +// } +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 2d245f7..ad096c3 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -642,6 +642,13 @@ fn numbers_work_as_expected() { }), parsed_single_token("0o10") ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 10 + }), + parsed_single_token("0010") + ); assert_eq!( Token::Integer(IntegerWithBase { base: Some(10), @@ -681,6 +688,20 @@ fn values_work_as_expected() { assert_eq!(Token::ValueName("ɑ".into()), parsed_single_token("ɑ")); } +#[test] +fn primitives() { + assert_eq!( + Token::PrimitiveValueName("add_u8".into()), + parsed_single_token("prim%add_u8"), + ); + assert_eq!( + Token::PrimitiveTypeName("U8".into()), + parsed_single_token("prim%U8"), + ); + assert!(Lexer::from("prim%").next().unwrap().is_err()); + assert!(Lexer::from("prim%%").next().unwrap().is_err()); +} + #[test] fn operators_work_as_expected() { assert_eq!(Token::OperatorName("-".into()), parsed_single_token("-")); @@ -743,10 +764,119 @@ fn unicode() { let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); assert_eq!(Some(Token::Character('¾')), next_token()); - let mut lexer = Lexer::from("'\\u{111111111111}'"); + let mut lexer = Lexer::from("'\\u{11111111111111111111111111111}'"); assert!(lexer.next().unwrap().is_err()); let mut lexer = Lexer::from("'\\u{00BE'"); assert!(lexer.next().unwrap().is_err()); let mut lexer = Lexer::from("'\\u00BE}'"); assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u{00Z}'"); + assert!(lexer.next().unwrap().is_err()); +} + +#[test] +fn character_string_errors() { + let mut lexer = Lexer::from("'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'-\\"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("''"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'ab'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\x'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'a'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('a'), + .. + })) + )); + let mut lexer = Lexer::from("'\\0'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\0'), + .. + })) + )); + let mut lexer = Lexer::from("'\\a'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\b'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\f'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\n'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\r'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\t'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\v'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\''"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\''), + .. + })) + )); + let mut lexer = Lexer::from("'\\\\'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\\'), + .. + })) + )); + + let mut lexer = Lexer::from("\"foo"); + assert!(lexer.next().unwrap().is_err()); } -- 2.53.0 From acbc62a170e5c8d045b0dc011a46f102dfb51127 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 18:36:29 -0800 Subject: [PATCH 22/33] Try to get a builder going. --- .github/workflows/builder.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/workflows/builder.yml diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml new file mode 100644 index 0000000..033b959 --- /dev/null +++ b/.github/workflows/builder.yml @@ -0,0 +1,26 @@ +name: Matrix +on: + - pull_request + - push +jobs: + main: + strategy: + matrix: + rust: + - stable + - beta + - nightly + name: ${{matrix.rust}} + runs-on: x86_64-linux + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@v1 + with: + toolchain: ${{matrix.rust}} + components: rustfmt, clippy + - run: rustup --version + - run: rustc -vV + + - run: cargo clippy -- --deny clippy::pedantic + - run: cargo fmt --all -- --check + - run: cargo test -- 2.53.0 From 60c0dcf35f3c64f97b68c73f7867f5c2c2e5f3d1 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 18:58:11 -0800 Subject: [PATCH 23/33] Yay CI fails. --- .github/workflows/builder.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index 033b959..d810f10 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -13,6 +13,10 @@ jobs: name: ${{matrix.rust}} runs-on: x86_64-linux steps: + - run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event." + - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!" + - run: echo "🔎 The name of your branch is ${{ gitea.ref }} and your repository is ${{ gitea.repository }}." + - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@v1 with: -- 2.53.0 From e6422d22e0436a994f87269cbc887dea1b0e8224 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 19:04:09 -0800 Subject: [PATCH 24/33] Yay CI fails. --- .github/workflows/builder.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index d810f10..03833f0 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -13,9 +13,9 @@ jobs: name: ${{matrix.rust}} runs-on: x86_64-linux steps: - - run: echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event." - - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!" - - run: echo "🔎 The name of your branch is ${{ gitea.ref }} and your repository is ${{ gitea.repository }}." + - run: /bin/echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event." + - run: /bin/echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!" + - run: /bin/echo "🔎 The name of your branch is ${{ gitea.ref }} and your repository is ${{ gitea.repository }}." - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@v1 -- 2.53.0 From dfb88f0dd75aa5bde83e37106efb5f23ce7a4af4 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 19:22:21 -0800 Subject: [PATCH 25/33] path found --- .github/workflows/builder.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index 03833f0..033b959 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -13,10 +13,6 @@ jobs: name: ${{matrix.rust}} runs-on: x86_64-linux steps: - - run: /bin/echo "🎉 The job was automatically triggered by a ${{ gitea.event_name }} event." - - run: /bin/echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!" - - run: /bin/echo "🔎 The name of your branch is ${{ gitea.ref }} and your repository is ${{ gitea.repository }}." - - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@v1 with: -- 2.53.0 From d597cacb2d9183a8b9b79b300497450609bf9d7b Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Mon, 24 Nov 2025 19:26:38 -0800 Subject: [PATCH 26/33] up the versions --- .github/workflows/builder.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index 033b959..df74f30 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -13,8 +13,8 @@ jobs: name: ${{matrix.rust}} runs-on: x86_64-linux steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@v1 + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@master with: toolchain: ${{matrix.rust}} components: rustfmt, clippy -- 2.53.0 From 3fd0ef52b976246f1fc95c83b498222e5a46d5fa Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:18:46 -0800 Subject: [PATCH 27/33] Play with strategies and such. --- .github/workflows/builder.yml | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index df74f30..d52d99d 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -1,15 +1,20 @@ name: Matrix on: - - pull_request - - push + pull_request + push: + branches: + - main + - 'releases/**' + jobs: + strategy: + matrix: + rust: + - stable + - beta + - nightly + main: - strategy: - matrix: - rust: - - stable - - beta - - nightly name: ${{matrix.rust}} runs-on: x86_64-linux steps: @@ -21,6 +26,6 @@ jobs: - run: rustup --version - run: rustc -vV - - run: cargo clippy -- --deny clippy::pedantic + - run: cargo clippy - run: cargo fmt --all -- --check - run: cargo test -- 2.53.0 From 0ea2fa03f537ef93e4ee821da86f46330cad714a Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:20:45 -0800 Subject: [PATCH 28/33] Foo --- .github/workflows/builder.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index d52d99d..ce28cc4 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -1,10 +1,10 @@ name: Matrix on: - pull_request - push: - branches: - - main - - 'releases/**' + - pull_request + - push: + branches: + - main + - 'releases/**' jobs: strategy: -- 2.53.0 From dd402d13c70d4fbade0ecf3227d04c6f124575b1 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:24:26 -0800 Subject: [PATCH 29/33] Foo2 --- .github/workflows/builder.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index ce28cc4..fc59c1a 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -1,10 +1,9 @@ name: Matrix on: - - pull_request - - push: - branches: - - main - - 'releases/**' + push: + branches: + - main + pull_request: jobs: strategy: -- 2.53.0 From 3b24fd1d0532c2b6e888ae54dcf1e688054f285d Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:28:45 -0800 Subject: [PATCH 30/33] Foo3 --- .github/workflows/builder.yml | 60 +++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index fc59c1a..a970d57 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -6,25 +6,59 @@ on: pull_request: jobs: - strategy: - matrix: - rust: - - stable - - beta - - nightly + format: + name: Format + runs-on: native:host + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - run: cargo fmt --all -- --check - main: - name: ${{matrix.rust}} - runs-on: x86_64-linux + clippy: + name: Clippy check + runs-on: native:host + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - run: cargo clippy --all-targets -- -D warnings + + build: + strategy: + matrix: + rust: + - stable + - beta + - nightly + + name: Build - ${{matrix.rust}} + runs-on: native:host steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{matrix.rust}} components: rustfmt, clippy - - run: rustup --version - - run: rustc -vV + - run: cargo build - - run: cargo clippy - - run: cargo fmt --all -- --check + test: + strategy: + matrix: + rust: + - stable + - beta + - nightly + + name: Test - ${{matrix.rust}} + runs-on: native:host + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + components: rustfmt, clippy - run: cargo test + -- 2.53.0 From 01d8ff3123ae09f1cab7ec075b263e472c00ce77 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:30:38 -0800 Subject: [PATCH 31/33] Foo4 --- .github/workflows/builder.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index a970d57..589bcd6 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -8,7 +8,7 @@ on: jobs: format: name: Format - runs-on: native:host + runs-on: native steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -18,7 +18,7 @@ jobs: clippy: name: Clippy check - runs-on: native:host + runs-on: native steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@stable @@ -35,7 +35,7 @@ jobs: - nightly name: Build - ${{matrix.rust}} - runs-on: native:host + runs-on: native steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@master @@ -53,12 +53,12 @@ jobs: - nightly name: Test - ${{matrix.rust}} - runs-on: native:host + runs-on: native steps: - uses: actions/checkout@v6 - uses: dtolnay/rust-toolchain@master with: toolchain: ${{matrix.rust}} components: rustfmt, clippy - - run: cargo test + - run: cargo build -- 2.53.0 From cfeffb7f24e4cd47e8bc6db96eefe11ef9379bf3 Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:31:41 -0800 Subject: [PATCH 32/33] Formatting. --- src/syntax/arbitrary.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 107346d..2cd6704 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -29,31 +29,24 @@ impl TypeGenerationContext { if !self.available_constructors.is_empty() { for name in self.available_constructors.iter() { - leaf_options.push(Type::Constructor( - Location::manufactured(), - name.clone(), - )); + leaf_options.push(Type::Constructor(Location::manufactured(), name.clone())); } } if !self.available_variables.is_empty() { for name in self.available_variables.iter() { - leaf_options.push(Type::Variable( - Location::manufactured(), - name.clone(), - )); + leaf_options.push(Type::Variable(Location::manufactured(), name.clone())); } } for prim in PRIMITIVE_TYPES.iter() { leaf_options.push(Type::Primitive( - Location::manufactured(), - Name::new(Location::manufactured(), prim.to_string()), + Location::manufactured(), + Name::new(Location::manufactured(), prim.to_string()), )); } - if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) { - } + if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) {} let index = runner.rng().random_range(0..leaf_options.len()); leaf_options.remove(index) -- 2.53.0 From c66d860542075a29729c39230f9be4b65b761bdb Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 26 Nov 2025 15:55:47 -0800 Subject: [PATCH 33/33] Try generating types. --- src/syntax/arbitrary.rs | 46 +++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs index 2cd6704..891447f 100644 --- a/src/syntax/arbitrary.rs +++ b/src/syntax/arbitrary.rs @@ -1,12 +1,9 @@ -use std::fmt::Arguments; - use crate::syntax::ast::{ConstantValue, IntegerWithBase, Type}; use crate::syntax::location::Location; use crate::syntax::name::Name; use itertools::Itertools; use proptest::arbitrary::Arbitrary; use proptest::prelude::{BoxedStrategy, Rng}; -use proptest::prop_oneof; use proptest::strategy::{NewTree, Strategy, ValueTree}; use proptest::test_runner::TestRunner; @@ -24,7 +21,7 @@ pub struct TypeGenerationContext { } impl TypeGenerationContext { - fn generate_type(&mut self, runner: &mut TestRunner, depth: usize) -> Type { + fn generate_type(&self, runner: &mut TestRunner, depth: usize) -> Type { let mut leaf_options = vec![]; if !self.available_constructors.is_empty() { @@ -46,10 +43,31 @@ impl TypeGenerationContext { )); } - if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) {} + let mut possibilities = leaf_options.len(); - let index = runner.rng().random_range(0..leaf_options.len()); - leaf_options.remove(index) + if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) { + possibilities += 2; + } + + let index = runner.rng().random_range(0..possibilities); + + if index >= leaf_options.len() { + let argument_count = runner.rng().random_range(0..MAXIMUM_TYPE_WIDTH); + let final_type = self.generate_type(runner, depth + 1); + let mut args = vec![]; + + for _ in 0..argument_count { + args.push(self.generate_type(runner, depth + 1)); + } + + if index - leaf_options.len() == 0 { + Type::Function(args, Box::new(final_type)) + } else { + Type::Application(Box::new(final_type), args) + } + } else { + leaf_options.remove(index) + } } } @@ -72,10 +90,6 @@ impl TypeGenerationTree { } } -fn generate_powerset(_: &[Type]) -> Vec> { - vec![] -} - fn simplify_type(incoming: &Type) -> Vec { match incoming { Type::Primitive(_, _) => vec![], @@ -275,8 +289,14 @@ impl Strategy for TypeGenerationContext { type Tree = TypeGenerationTree; type Value = Type; - fn new_tree(&self, _runner: &mut TestRunner) -> NewTree { - unimplemented!() + fn new_tree(&self, runner: &mut TestRunner) -> NewTree { + let initial_type = self.generate_type(runner, 0); + + Ok(TypeGenerationTree { + current_value: initial_type, + parent: None, + untried_simplified_items: None, + }) } } -- 2.53.0