diff --git a/.gitignore b/.gitignore index b1604b6..85f98e5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,8 @@ hsrc/Syntax/Lexer.hs hsrc/Syntax/Parser.hs bang + + +# Added by cargo + +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f08dbe5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1044 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "ascii-canvas" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" +dependencies = [ + "term", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bang" +version = "0.1.0" +dependencies = [ + "codespan", + "codespan-reporting", + "lalrpop", + "lalrpop-util", + "logos", + "proptest", + "proptest-derive", + "thiserror 2.0.14", +] + +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "codespan" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e4b418d52c9206820a56fc1aa28db73d67e346ba8ba6aa90987e8d6becef7e4" +dependencies = [ + "codespan-reporting", + "serde", +] + +[[package]] +name = "codespan-reporting" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" +dependencies = [ + "serde", + "termcolor", + "unicode-width", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "ena" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1" +dependencies = [ + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", +] + +[[package]] +name = "hashbrown" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" + +[[package]] +name = "indexmap" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "lalrpop" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca" +dependencies = [ + "ascii-canvas", + "bit-set 0.5.3", + "ena", + "itertools", + "lalrpop-util", + "petgraph", + "pico-args", + "regex", + "regex-syntax", + "string_cache", + "term", + "tiny-keccak", + "unicode-xid", + "walkdir", +] + +[[package]] +name = "lalrpop-util" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "lock_api" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "logos" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff472f899b4ec2d99161c51f60ff7075eeb3097069a36050d8037a6325eb8154" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "192a3a2b90b0c05b27a0b2c43eecdb7c415e29243acc3f89cc8247a5b693045c" +dependencies = [ + "beef", + "fnv", + "lazy_static", + "proc-macro2", + "quote", + "regex-syntax", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605d9697bcd5ef3a42d38efc51541aa3d6a4a25f7ab6d1ed0da5ac632a26b470" +dependencies = [ + "logos-codegen", +] + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + +[[package]] +name = "proc-macro2" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f" +dependencies = [ + "bit-set 0.8.0", + "bit-vec 0.8.0", + "bitflags", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "proptest-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "095a99f75c69734802359b682be8daaf8980296731f6470434ea2c652af1dd30" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.3", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.5.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.57", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.60.2", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl 1.0.57", +] + +[[package]] +name = "thiserror" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" +dependencies = [ + "thiserror-impl 2.0.14", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "zerocopy" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1848442 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "bang" +version = "0.1.0" +edition = "2024" + +[dependencies] +codespan = "0.12.0" +codespan-reporting = "0.12.0" +lalrpop-util = "0.20.2" +logos = "0.15.1" +proptest = "1.7.0" +proptest-derive = "0.6.0" +thiserror = "2.0.12" + +[build-dependencies] +lalrpop = "0.20.2" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..23c7d3f --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +extern crate lalrpop; + +fn main() { + lalrpop::process_root().unwrap(); +} diff --git a/src/bin/bangc.rs b/src/bin/bangc.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/src/bin/bangc.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4a39d2c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod syntax; diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..a4701b6 --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,251 @@ +use lalrpop_util::lalrpop_mod; + +mod error; +lalrpop_mod!( + #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)] + parser, + "/syntax/parser.rs" +); +pub mod tokens; + +#[cfg(test)] +use crate::syntax::error::ParserError; +#[cfg(test)] +use crate::syntax::parser::*; +#[cfg(test)] +use crate::syntax::tokens::Lexer; +use codespan_reporting::diagnostic::Label; +use proptest_derive::Arbitrary; +use std::cmp::{max, min}; +use std::fmt::Debug; +use std::ops::Range; + +#[derive(Debug)] +pub struct Location { + file_id: usize, + span: Range, +} + +impl Location { + pub fn new(file_id: usize, span: Range) -> Self { + Location { file_id, span } + } + + pub fn extend_to(&self, other: &Location) -> Location { + assert_eq!(self.file_id, other.file_id); + Location { + file_id: self.file_id, + span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), + } + } + + pub fn primary_label(&self) -> Label { + Label::primary(self.file_id, self.span.clone()) + } + + pub fn secondary_label(&self) -> Label { + Label::secondary(self.file_id, self.span.clone()) + } +} + +pub struct Module { + definitions: Vec, +} + +pub struct Definition { + location: Location, + export: ExportClass, + type_restrictions: TypeRestrictions, + definition: Def, +} + +pub enum Def { + Enumeration(EnumerationDef), + Structure(StructureDef), + Function(FunctionDef), + Value(ValueDef), +} + +impl Def { + fn location(&self) -> &Location { + match self { + Def::Enumeration(def) => &def.location, + Def::Structure(def) => &def.location, + Def::Function(def) => &def.location, + Def::Value(def) => &def.location, + } + } +} + +pub struct EnumerationDef { + location: Location, + options: Vec, +} + +pub struct EnumerationVariant { + location: Location, + name: String, + arguments: Vec, +} + +pub struct StructureDef { + name: String, + location: Location, + fields: Vec, +} + +pub struct StructureField { + name: String, + field_type: Type, +} + +pub struct FunctionDef { + name: String, + location: Location, + arguments: Vec, + return_type: Option, + body: Vec, +} + +pub struct FunctionArg { + name: String, + arg_type: Option, +} + +pub struct ValueDef { + name: String, + location: Location, + value: Value, +} + +pub enum ExportClass { + Public, + Private, +} + +pub enum Statement { + Binding(BindingStmt), +} + +pub struct BindingStmt { + location: Location, + mutable: bool, + variable: String, + value: Expression, +} + +pub enum Expression { + Value(Value), +} + +pub struct TypeRestrictions { + restrictions: Vec, +} + +impl TypeRestrictions { + fn empty() -> Self { + TypeRestrictions { + restrictions: vec![], + } + } +} + +pub struct TypeRestriction { + location: Location, + class: String, + variables: Vec, +} + +pub enum Type { + Constructor(Location, String), + Variable(Location, String), + Primitive(Location, String), + Application(Box, Vec), + Function(Vec, Box), +} + +pub enum Value { + Constant(ConstantValue), +} + +pub enum ConstantValue { + Integer(Location, IntegerWithBase), + Character(Location, char), + String(Location, String), +} + +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub struct IntegerWithBase { + #[proptest(strategy = "proptest::prop_oneof![ \ + proptest::strategy::Just(None), \ + proptest::strategy::Just(Some(2)), \ + proptest::strategy::Just(Some(8)), \ + proptest::strategy::Just(Some(10)), \ + proptest::strategy::Just(Some(16)), \ + ]")] + base: Option, + value: u64, +} + +#[test] +fn can_parse_constants() { + let parse_constant = |str| { + let lexer = Lexer::from(str).map(|item| { + item.map_err(|e| ParserError::LexerError { + file_id: 0, + error: e, + }) + }); + let result = ConstantValueParser::new().parse(0, lexer); + result + }; + + assert!(matches!( + parse_constant("16"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: None, + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0x10"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(16), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0o20"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(8), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0b10000"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(2), + value: 16, + } + )) + )); + assert!( + matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x)) + if x == "foo") + ); + assert!(matches!( + parse_constant("'f'"), + Ok(ConstantValue::Character(_, 'f')) + )); +} diff --git a/src/syntax/error.rs b/src/syntax/error.rs new file mode 100644 index 0000000..eccef47 --- /dev/null +++ b/src/syntax/error.rs @@ -0,0 +1,116 @@ +//use codespan_reporting::diagnostic::{Diagnostic, Label}; +use crate::syntax::tokens::Token; +use std::ops::Range; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ParserError { + #[error("Lexer error at {file_id}: {error}")] + LexerError { file_id: usize, error: LexerError }, +} + +#[derive(Clone, Debug, Error, PartialEq)] +pub enum LexerError { + #[error("Illegal control character in input stream at offset {offset}")] + IllegalControlCharacter { offset: usize }, + + #[error("Illegal primitive value/type; it cut off before we could determine which at {span:?}")] + IllegalPrimitive { span: Range }, + + #[error("Illegal character in primitive ({char:?}) at {span:?}")] + IllegalPrimitiveCharacter { span: Range, char: char }, + + #[error("Unfinished character constant found at {span:?}")] + UnfinishedCharacter { span: Range }, + + #[error("Unfinished string constant found at {span:?}")] + UnfinishedString { span: Range }, + + #[error("Character {char:?} has some extra bits at the end at {span:?}")] + OverlongCharacter { char: char, span: Range }, + + #[error("Unknown escaped character {escaped_char:?} at {span:?}")] + UnknownEscapeCharacter { + escaped_char: char, + span: Range, + }, + + #[error("Invalid unicode escape sequence at {span:?}")] + InvalidUnicode { span: Range }, +} + +impl LexerError { + pub fn to_triple(&self) -> (usize, Result, usize) { + match self { + LexerError::IllegalControlCharacter { offset } => (*offset, Err(self.clone()), *offset), + LexerError::IllegalPrimitive { span } => (span.start, Err(self.clone()), span.end), + LexerError::IllegalPrimitiveCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedString { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::OverlongCharacter { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::UnknownEscapeCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::InvalidUnicode { span, .. } => (span.start, Err(self.clone()), span.end), + } + } +} + +//impl From for Diagnostic { +// fn from(value: LexerError) -> Self { +// match value { +// LexerError::IllegalControlCharacter { file, offset } => Diagnostic::error() +// .with_code("E1001") +// .with_message("Illegal control character in input stream") +// .with_label(Label::primary(file, offset..offset).with_message("illegal character")), +// +// LexerError::IllegalPrimitive { file, span } => Diagnostic::error() +// .with_code("E1002") +// .with_message("Illegal primitive; it cut off before it could finish") +// .with_label( +// Label::primary(file, span) +// .with_message("should be at least one character after the %"), +// ), +// +// LexerError::IllegalPrimitiveCharacter { file, span, char } => Diagnostic::error() +// .with_code("E1003") +// .with_message(format!("Illegal character {char:?} in primitive")) +// .with_label(Label::primary(file, span).with_message("illegal character")), +// +// LexerError::UnfinishedCharacter { file, span } => Diagnostic::error() +// .with_code("E1004") +// .with_message("Unfinished character in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished character")), +// +// LexerError::UnfinishedString { file, span } => Diagnostic::error() +// .with_code("E1005") +// .with_message("Unfinished string in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished string")), +// +// LexerError::OverlongCharacter { file, char, span } => Diagnostic::error() +// .with_code("E1006") +// .with_message(format!( +// "Character {char:?} has some extra bits at the end of it." +// )) +// .with_label(Label::primary(file, span).with_message("overlong character")), +// +// LexerError::UnknownEscapeCharacter { +// file, +// escaped_char, +// span, +// } => Diagnostic::error() +// .with_code("E1007") +// .with_message(format!("Unknown escape character {escaped_char:?}.")) +// .with_label(Label::primary(file, span).with_message("unknown character")), +// +// LexerError::InvalidUnicode { file, span } => Diagnostic::error() +// .with_code("E1008") +// .with_message("Unknown or invalid unicode escape sequence.") +// .with_label(Label::primary(file, span).with_message("escape sequence")), +// } +// } +//} diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop new file mode 100644 index 0000000..1bf890e --- /dev/null +++ b/src/syntax/parser.lalrpop @@ -0,0 +1,39 @@ +use crate::syntax::*; +use crate::syntax::error::ParserError; +use crate::syntax::tokens::*; + +grammar(file_id: usize); + +extern { + type Location = usize; + type Error = ParserError; + + enum Token { + "(" => Token::OpenParen, + ")" => Token::CloseParen, + "[" => Token::OpenSquare, + "]" => Token::CloseSquare, + "{" => Token::OpenBrace, + "}" => Token::CloseBrace, + ";" => Token::Semi, + ":" => Token::Colon, + "," => Token::Comma, + "`" => Token::BackTick, + "\\" => Token::Lambda(_), + + "" => Token::TypeName(), + "" => Token::ValueName(), + "" => Token::OperatorName(), + "" => Token::PrimitiveTypeName(), + "" => Token::PrimitiveValueName(), + "" => Token::Integer(), + "" => Token::Character(), + "" => Token::String(), + } +} + +pub ConstantValue: ConstantValue = { + "> => ConstantValue::Integer(Location::new(file_id, s..e), x), + "> => ConstantValue::Character(Location::new(file_id, s..e), x), + "> => ConstantValue::String(Location::new(file_id, s..e), x), +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs new file mode 100644 index 0000000..9399b9e --- /dev/null +++ b/src/syntax/tokens.rs @@ -0,0 +1,609 @@ +use crate::syntax::IntegerWithBase; +use crate::syntax::error::LexerError; +use proptest_derive::Arbitrary; +use std::fmt; +use std::str::CharIndices; + +/// A single token of the input stream; used to help the parsing function over +/// more concrete things than bytes. +/// +/// The [`std::fmt::Display`] implementation is designed to round-trip, so those +/// needing a more regular or descriptive option should consider using the +/// [`std::fmt::Debug`] implementation instead. +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub enum Token { + OpenParen, + CloseParen, + OpenSquare, + CloseSquare, + OpenBrace, + CloseBrace, + Semi, + Colon, + Comma, + BackTick, + Lambda(bool), + + TypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + ValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + OperatorName( + #[proptest( + regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*" + )] + String, + ), + + PrimitiveTypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + PrimitiveValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + + Integer(IntegerWithBase), + Character(char), + String(String), +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + Token::OpenSquare => write!(f, "["), + Token::CloseSquare => write!(f, "]"), + Token::OpenBrace => write!(f, "{{"), + Token::CloseBrace => write!(f, "}}"), + Token::Semi => write!(f, ";"), + Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::BackTick => write!(f, "`"), + Token::Lambda(false) => write!(f, "\\"), + Token::Lambda(true) => write!(f, "λ"), + Token::TypeName(str) => write!(f, "{str}"), + Token::ValueName(str) => write!(f, "{str}"), + Token::OperatorName(str) => write!(f, "{str}"), + Token::PrimitiveTypeName(str) => write!(f, "prim%{str}"), + Token::PrimitiveValueName(str) => write!(f, "prim%{str}"), + Token::Integer(IntegerWithBase { base, value }) => match base { + None => write!(f, "{value}"), + Some(2) => write!(f, "0b{value:b}"), + Some(8) => write!(f, "0o{value:o}"), + Some(10) => write!(f, "0d{value}"), + Some(16) => write!(f, "0x{value:x}"), + Some(base) => write!(f, ""), + }, + Token::Character(c) => write!(f, "{c:?}"), + Token::String(s) => write!(f, "{s:?}"), + } + } +} + +#[allow(private_interfaces)] +pub enum Lexer<'a> { + Working(LexerState<'a>), + Errored(LexerError), + Done(usize), +} + +struct LexerState<'a> { + stream: CharIndices<'a>, + buffer: Option<(usize, char)>, +} + +impl<'a> From<&'a str> for Lexer<'a> { + fn from(value: &'a str) -> Self { + println!("LEXING '{value}'"); + Lexer::Working(LexerState { + stream: value.char_indices(), + buffer: None, + }) + } +} + +impl<'a> Lexer<'a> { + pub fn new(stream: &'a str) -> Self { + Lexer::Working(LexerState { + stream: stream.char_indices(), + buffer: None, + }) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result<(usize, Token, usize), LexerError>; + + fn next(&mut self) -> Option { + match self { + Lexer::Done(_) => None, + Lexer::Errored(e) => Some(Err(e.clone())), + Lexer::Working(state) => match state.next_token() { + Err(e) => { + println!("ERROR: {e}"); + *self = Lexer::Errored(e.clone()); + Some(Err(e)) + } + + Ok(None) => { + println!("LEXER DONE"); + *self = Lexer::Done(state.stream.offset()); + None + } + + Ok(Some((start, token, end))) => { + println!("TOKEN: {:?}", token); + Some(Ok((start, token, end))) + } + }, + } + } +} + +impl<'a> LexerState<'a> { + fn next_char(&mut self) -> Option<(usize, char)> { + let result = self.buffer.take().or_else(|| self.stream.next()); + println!("next_char() -> {result:?}"); + result + } + + fn stash_char(&mut self, idx: usize, c: char) { + println!("stash_char({idx}, {c})"); + assert!(self.buffer.is_none()); + self.buffer = Some((idx, c)); + } + + fn next_token(&mut self) -> Result, LexerError> { + while let Some((token_start_offset, char)) = self.next_char() { + if char.is_whitespace() { + continue; + } + + let simple_response = + |token| Ok(Some((token_start_offset, token, self.stream.offset()))); + + match char { + '(' => return simple_response(Token::OpenParen), + ')' => return simple_response(Token::CloseParen), + '[' => return simple_response(Token::OpenSquare), + ']' => return simple_response(Token::CloseSquare), + '{' => return simple_response(Token::OpenBrace), + '}' => return simple_response(Token::CloseBrace), + ';' => return simple_response(Token::Semi), + ':' => return simple_response(Token::Colon), + ',' => return simple_response(Token::Comma), + '`' => return simple_response(Token::BackTick), + '\\' => return simple_response(Token::Lambda(false)), + 'λ' => return simple_response(Token::Lambda(true)), + + '0' => return self.starts_with_zero(token_start_offset), + '\'' => return self.starts_with_single(token_start_offset), + '\"' => return self.starts_with_double(token_start_offset), + _ => {} + } + + if let Some(value) = char.to_digit(10) { + return self.parse_integer(token_start_offset, 10, None, value as u64); + } + + if char.is_uppercase() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::TypeName, + ); + } + + if char.is_alphabetic() || char == '_' { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::ValueName, + ); + } + + if !char.is_alphanumeric() && !char.is_whitespace() && !char.is_control() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ); + } + } + + Ok(None) + } + + fn starts_with_zero( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => { + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some((token_start_offset, token, self.stream.offset()))) + } + + Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), + Some((_, 'o')) => self.parse_integer(token_start_offset, 8, Some(8), 0), + Some((_, 'd')) => self.parse_integer(token_start_offset, 10, Some(10), 0), + Some((_, 'x')) => self.parse_integer(token_start_offset, 16, Some(16), 0), + + Some((offset, c)) => { + if let Some(value) = c.to_digit(10) { + self.parse_integer(token_start_offset, 10, None, value as u64) + } else { + self.stash_char(offset, c); + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some((token_start_offset, token, offset))) + } + } + } + } + + fn parse_integer( + &mut self, + token_start_offset: usize, + base: u32, + provided_base: Option, + mut value: u64, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + if let Some(digit) = c.to_digit(base) { + value = (value * (base as u64)) + (digit as u64); + } else { + self.stash_char(offset, c); + break; + } + } + + let token = Token::Integer(IntegerWithBase { + base: provided_base, + value, + }); + + Ok(Some((token_start_offset, token, end_offset))) + } + + fn parse_identifier( + &mut self, + token_start_offset: usize, + mut identifier: String, + mut allowed_character: fn(char) -> bool, + mut builder: fn(String) -> Token, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + + if allowed_character(c) { + identifier.push(c); + } else if identifier == "prim" && c == '%' { + identifier = String::new(); + allowed_character = |c| c.is_alphanumeric() || c == '_'; + match self.next_char() { + None => { + return Err(LexerError::IllegalPrimitive { + span: token_start_offset..end_offset, + }); + } + + Some((_, char)) => { + if char.is_uppercase() { + identifier.push(char); + builder = Token::PrimitiveTypeName; + } else if char.is_lowercase() || char == '_' { + identifier.push(char); + builder = Token::PrimitiveValueName; + } else { + return Err(LexerError::IllegalPrimitiveCharacter { + span: token_start_offset..end_offset, + char, + }); + } + } + } + } else { + self.stash_char(offset, c); + break; + } + } + + Ok(Some((token_start_offset, builder(identifier), end_offset))) + } + + fn starts_with_single( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let Some((_, mut char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char == '\\' { + char = self.get_escaped_character(token_start_offset)?; + } + + let Some((idx, finish_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if finish_char != '\'' { + return Err(LexerError::OverlongCharacter { + char, + span: token_start_offset..self.stream.offset(), + }); + } + + Ok(Some((token_start_offset, Token::Character(char), idx))) + } + + fn get_escaped_character(&mut self, token_start_offset: usize) -> Result { + let Some((idx, escaped_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + match escaped_char { + '0' => Ok('\0'), + 'a' => Ok('\u{0007}'), + 'b' => Ok('\u{0008}'), + 'f' => Ok('\u{000C}'), + 'n' => Ok('\n'), + 'r' => Ok('\r'), + 't' => Ok('\t'), + 'u' => self.get_unicode_sequence(idx), + 'v' => Ok('\u{000B}'), + '\'' => Ok('\''), + '"' => Ok('"'), + '\\' => Ok('\\'), + _ => Err(LexerError::UnknownEscapeCharacter { + escaped_char, + span: idx..self.stream.offset(), + }), + } + } + + fn get_unicode_sequence(&mut self, token_start_offset: usize) -> Result { + let Some((_, char)) = self.next_char() else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char != '{' { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + let mut value = 0; + + while let Some((idx, char)) = self.next_char() { + if let Some(digit) = char.to_digit(16) { + value = (value * 16) + digit; + continue; + } + + if char == '}' { + if let Some(char) = char::from_u32(value) { + return Ok(char); + } else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..idx, + }); + } + } + + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }) + } + + fn starts_with_double( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let mut result = String::new(); + + while let Some((idx, char)) = self.next_char() { + match char { + '"' => return Ok(Some((token_start_offset, Token::String(result), idx))), + + '\\' => result.push(self.get_escaped_character(idx)?), + + _ => result.push(char), + } + } + + Err(LexerError::UnfinishedString { + span: token_start_offset..self.stream.offset(), + }) + } +} + +proptest::proptest! { + #[test] + fn token_string_token(token: Token) { + println!("Starting from {token:?}"); + let string = format!("{token}"); + let mut tokens = Lexer::from(string.as_str()); + let initial_token = tokens.next() + .expect("Can get a token without an error.") + .expect("Can get a valid token.") + .1; + + proptest::prop_assert_eq!(token, initial_token); + proptest::prop_assert!(tokens.next().is_none()); + } +} + +#[cfg(test)] +fn parsed_single_token(s: &str) -> Token { + let mut tokens = Lexer::from(s); + let result = tokens + .next() + .expect(format!("Can get at least one token from {s:?}").as_str()) + .expect("Can get a valid token.") + .1; + + assert!( + tokens.next().is_none(), + "Should only get one token from {s:?}" + ); + + result +} + +#[test] +fn numbers_work_as_expected() { + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 1 + }), + parsed_single_token("1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 1 + }), + parsed_single_token("0b1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 1 + }), + parsed_single_token("0o1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 1 + }), + parsed_single_token("0d1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 1 + }), + parsed_single_token("0x1") + ); + + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 10 + }), + parsed_single_token("10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 2 + }), + parsed_single_token("0b10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 8 + }), + parsed_single_token("0o10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 10 + }), + parsed_single_token("0d10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 16 + }), + parsed_single_token("0x10") + ); +} + +#[test] +fn lambda_works() { + assert_eq!(Token::Lambda(false), parsed_single_token("\\")); + assert_eq!(Token::Lambda(true), parsed_single_token("λ")); + assert_eq!(Token::TypeName("Λ".into()), parsed_single_token("Λ")); +} + +#[test] +fn types_work_as_expected() { + assert_eq!(Token::TypeName("Int".into()), parsed_single_token("Int")); + assert_eq!(Token::TypeName("Int8".into()), parsed_single_token("Int8")); + assert_eq!(Token::TypeName("Γ".into()), parsed_single_token("Γ")); +} + +#[test] +fn values_work_as_expected() { + assert_eq!( + Token::ValueName("alpha".into()), + parsed_single_token("alpha") + ); + assert_eq!(Token::ValueName("ɑ".into()), parsed_single_token("ɑ")); +} + +#[test] +fn operators_work_as_expected() { + assert_eq!(Token::OperatorName("-".into()), parsed_single_token("-")); + assert_eq!(Token::OperatorName("+".into()), parsed_single_token("+")); + assert_eq!(Token::OperatorName("*".into()), parsed_single_token("*")); + assert_eq!(Token::OperatorName("/".into()), parsed_single_token("/")); + assert_eq!(Token::OperatorName("↣".into()), parsed_single_token("↣")); +} + +#[test] +fn can_separate_pieces() { + let mut lexer = Lexer::from("a-b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a--b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("--".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a - -b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); +}