diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml new file mode 100644 index 0000000..589bcd6 --- /dev/null +++ b/.github/workflows/builder.yml @@ -0,0 +1,64 @@ +name: Matrix +on: + push: + branches: + - main + pull_request: + +jobs: + format: + name: Format + runs-on: native + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - run: cargo fmt --all -- --check + + clippy: + name: Clippy check + runs-on: native + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - run: cargo clippy --all-targets -- -D warnings + + build: + strategy: + matrix: + rust: + - stable + - beta + - nightly + + name: Build - ${{matrix.rust}} + runs-on: native + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + components: rustfmt, clippy + - run: cargo build + + test: + strategy: + matrix: + rust: + - stable + - beta + - nightly + + name: Test - ${{matrix.rust}} + runs-on: native + steps: + - uses: actions/checkout@v6 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + components: rustfmt, clippy + - run: cargo build + diff --git a/.gitignore b/.gitignore index b1604b6..70e8cf0 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,9 @@ hsrc/Syntax/Lexer.hs hsrc/Syntax/Parser.hs bang + + +# Added by cargo +/proptest-regressions +/target +.aider* diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..352c0e2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,690 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "ariadne" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f5e3dca4e09a6f340a61a0e9c7b61e030c69fc27bf29d73218f7e5e3b7638f" +dependencies = [ + "concolor", + "unicode-width 0.1.14", + "yansi", +] + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bang" +version = "0.1.0" +dependencies = [ + "ariadne", + "internment", + "itertools", + "memmap2", + "pretty", + "proptest", + "proptest-derive", + "thiserror", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "concolor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b946244a988c390a94667ae0e3958411fa40cc46ea496a929b263d883f5f9c3" +dependencies = [ + "bitflags 1.3.2", + "concolor-query", + "is-terminal", +] + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys 0.45.0", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + +[[package]] +name = "internment" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "636d4b0f6a39fd684effe2a73f5310df16a3fa7954c26d36833e98f44d1977a2" +dependencies = [ + "ahash", + "dashmap", + "hashbrown 0.15.5", + "once_cell", +] + +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "libc" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "memmap2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +dependencies = [ + "libc", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "pretty" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d22152487193190344590e4f30e219cf3fe140d9e7a3fdb683d82aa2c5f4156" +dependencies = [ + "arrayvec", + "termcolor", + "typed-arena", + "unicode-width 0.2.2", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "proptest" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.10.0", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "proptest-derive" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "095a99f75c69734802359b682be8daaf8980296731f6470434ea2c652af1dd30" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.10.0", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "syn" +version = "2.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "yansi" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" + +[[package]] +name = "zerocopy" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..35c2c6d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "bang" +version = "0.1.0" +edition = "2024" + +[dependencies] +ariadne = { version = "0.5.1", features = ["auto-color"] } +internment = { version = "0.8.6", features = ["arc", "arena"] } +itertools = "0.14.0" +memmap2 = "0.9.8" +pretty = { version = "0.12.5", features = ["termcolor"] } +proptest = "1.7.0" +proptest-derive = "0.6.0" +thiserror = "2.0.12" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage)'] } diff --git a/src/bin/bangc.rs b/src/bin/bangc.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/src/bin/bangc.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4a39d2c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod syntax; diff --git a/src/syntax.rs b/src/syntax.rs new file mode 100644 index 0000000..9edde17 --- /dev/null +++ b/src/syntax.rs @@ -0,0 +1,17 @@ +mod arbitrary; +mod ast; +mod error; +mod location; +mod name; +mod parse; +#[cfg(test)] +mod parser_tests; +mod print; +mod tokens; +mod universe; + +pub use crate::syntax::error::ParserError; +pub use ast::*; +pub use location::{Located, Location}; +pub use name::Name; +pub use universe::*; diff --git a/src/syntax/arbitrary.rs b/src/syntax/arbitrary.rs new file mode 100644 index 0000000..891447f --- /dev/null +++ b/src/syntax/arbitrary.rs @@ -0,0 +1,561 @@ +use crate::syntax::ast::{ConstantValue, IntegerWithBase, Type}; +use crate::syntax::location::Location; +use crate::syntax::name::Name; +use itertools::Itertools; +use proptest::arbitrary::Arbitrary; +use proptest::prelude::{BoxedStrategy, Rng}; +use proptest::strategy::{NewTree, Strategy, ValueTree}; +use proptest::test_runner::TestRunner; + +const MAXIMUM_TYPE_DEPTH: usize = 5; +const MAXIMUM_TYPE_WIDTH: usize = 5; +const MAXIMUM_STRING_SIZE: usize = 32; +const PRIMITIVE_TYPES: &[&str] = &[ + "Char", "String", "I8", "I16", "I32", "I64", "U8", "U16", "U32", "U64", +]; + +#[derive(Debug, Default)] +pub struct TypeGenerationContext { + available_constructors: Vec, + available_variables: Vec, +} + +impl TypeGenerationContext { + fn generate_type(&self, runner: &mut TestRunner, depth: usize) -> Type { + let mut leaf_options = vec![]; + + if !self.available_constructors.is_empty() { + for name in self.available_constructors.iter() { + leaf_options.push(Type::Constructor(Location::manufactured(), name.clone())); + } + } + + if !self.available_variables.is_empty() { + for name in self.available_variables.iter() { + leaf_options.push(Type::Variable(Location::manufactured(), name.clone())); + } + } + + for prim in PRIMITIVE_TYPES.iter() { + leaf_options.push(Type::Primitive( + Location::manufactured(), + Name::new(Location::manufactured(), prim.to_string()), + )); + } + + let mut possibilities = leaf_options.len(); + + if depth < MAXIMUM_TYPE_DEPTH && runner.rng().random_bool(0.5) { + possibilities += 2; + } + + let index = runner.rng().random_range(0..possibilities); + + if index >= leaf_options.len() { + let argument_count = runner.rng().random_range(0..MAXIMUM_TYPE_WIDTH); + let final_type = self.generate_type(runner, depth + 1); + let mut args = vec![]; + + for _ in 0..argument_count { + args.push(self.generate_type(runner, depth + 1)); + } + + if index - leaf_options.len() == 0 { + Type::Function(args, Box::new(final_type)) + } else { + Type::Application(Box::new(final_type), args) + } + } else { + leaf_options.remove(index) + } + } +} + +#[derive(Clone)] +pub struct TypeGenerationTree { + current_value: Type, + parent: Option>, + untried_simplified_items: Option>, +} + +impl TypeGenerationTree { + /// Create a new type generation tree based on the given + /// initial value. + pub fn new(initial_value: Type) -> TypeGenerationTree { + TypeGenerationTree { + current_value: initial_value, + parent: None, + untried_simplified_items: None, + } + } +} + +fn simplify_type(incoming: &Type) -> Vec { + match incoming { + Type::Primitive(_, _) => vec![], + Type::Constructor(_, _) => vec![], + Type::Variable(_, _) => vec![], + Type::Function(arg_types, ret_type) => { + let simplified_return_types = simplify_type(ret_type.as_ref()); + + // we do the following as a set of steps, choosing to go deep rather than + // broad immediately. So this works as follows: + // + // 1. If there are simplifications for the return type, then just + // return variations with the simplified return type. + // 2. If there are simplifications for the first argument, then + // just return variations with the first argument simplified. + // 3. Repeat for each of the arguments. + // 4. At this point, all the subtypes are as simple as they can + // be, so return a series of function types with fewer arguments. + // 5. If we are a function with no arguments, then just return + // the return type. + if !simplified_return_types.is_empty() { + return simplified_return_types + .into_iter() + .map(|ret| Type::Function(arg_types.clone(), Box::new(ret))) + .collect(); + } + + // now check the arguments, and see if we can simplify them in a + // better way. + for idx in 0..arg_types.len() { + let simplified_arguments = simplify_type(&arg_types[idx]); + + if simplified_arguments.is_empty() { + continue; + } + + let mut new_function_types = vec![]; + + for simplified_arg in simplified_arguments.into_iter() { + let mut new_args = vec![]; + + for item in &arg_types[0..idx] { + new_args.push(item.clone()); + } + new_args.push(simplified_arg); + for item in &arg_types[idx + 1..arg_types.len()] { + new_args.push(item.clone()); + } + + new_function_types.push(Type::Function(new_args, ret_type.clone())); + } + + if !new_function_types.is_empty() { + return new_function_types; + } + } + + // ok, all of the arguments and the return type are already as + // simple as they can be, so let's see if we can reduce the number + // of arguments. + let mut new_types = vec![]; + for args in arg_types.iter().powerset() { + if args.len() != arg_types.len() { + new_types.push(Type::Function( + args.into_iter().cloned().collect(), + ret_type.clone(), + )); + } + } + + if new_types.is_empty() { + vec![ret_type.as_ref().clone()] + } else { + new_types + } + } + + Type::Application(constructor_type, arg_types) => { + // much like functions, we're going to try to simplify the constructor, + // then we'll try to simplify the arguments, then we'll try to remove + // arguments. + let simplified_constructor = simplify_type(constructor_type.as_ref()); + + if !simplified_constructor.is_empty() { + return simplified_constructor + .into_iter() + .map(|c| Type::Application(Box::new(c), arg_types.clone())) + .collect(); + } + + // now check the arguments, and see if we can simplify them in a + // better way. + for idx in 0..arg_types.len() { + let simplified_arguments = simplify_type(&arg_types[idx]); + + if simplified_arguments.is_empty() { + continue; + } + + let mut new_appl_types = vec![]; + + for simplified_arg in simplified_arguments.into_iter() { + let mut new_args = vec![]; + + for item in &arg_types[0..idx] { + new_args.push(item.clone()); + } + new_args.push(simplified_arg); + for item in &arg_types[idx + 1..arg_types.len()] { + new_args.push(item.clone()); + } + + new_appl_types.push(Type::Application(constructor_type.clone(), new_args)); + } + + if !new_appl_types.is_empty() { + return new_appl_types; + } + } + + // and now we'll try to reduce types. + let mut new_types = vec![]; + for args in arg_types.iter().powerset() { + if args.len() != arg_types.len() { + new_types.push(Type::Application( + constructor_type.clone(), + args.into_iter().cloned().collect(), + )); + } + } + + if new_types.is_empty() { + vec![constructor_type.as_ref().clone()] + } else { + new_types + } + } + } +} + +impl ValueTree for TypeGenerationTree { + type Value = Type; + + fn current(&self) -> Self::Value { + self.current_value.clone() + } + + fn simplify(&mut self) -> bool { + match self.untried_simplified_items.as_mut() { + None => { + let mut simplified = simplify_type(&self.current_value) + .into_iter() + .map(|current_value| TypeGenerationTree { + current_value, + parent: Some(Box::new(self.clone())), + + untried_simplified_items: None, + }) + .collect::>(); + + match simplified.pop() { + None => { + self.untried_simplified_items = Some(simplified); + false + } + + Some(next_tree) => { + self.untried_simplified_items = Some(simplified); + *self = next_tree; + true + } + } + } + + Some(untried_simplifieds) => match untried_simplifieds.pop() { + None => false, + Some(x) => { + *self = x; + true + } + }, + } + } + + fn complicate(&mut self) -> bool { + match self.parent.take() { + None => false, + Some(x) => { + *self = *x; + true + } + } + } +} + +impl Strategy for TypeGenerationContext { + type Tree = TypeGenerationTree; + type Value = Type; + + fn new_tree(&self, runner: &mut TestRunner) -> NewTree { + let initial_type = self.generate_type(runner, 0); + + Ok(TypeGenerationTree { + current_value: initial_type, + parent: None, + untried_simplified_items: None, + }) + } +} + +impl Arbitrary for Type { + type Parameters = TypeGenerationContext; + type Strategy = TypeGenerationContext; + + fn arbitrary_with(_context: Self::Parameters) -> Self::Strategy { + unimplemented!() + } +} + +#[derive(Default)] +pub enum LegalConstantType { + #[default] + Any, + String, + Char, + Number, +} + +impl Arbitrary for ConstantValue { + type Parameters = LegalConstantType; + type Strategy = BoxedStrategy; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + match args { + LegalConstantType::Char => char::arbitrary() + .prop_map(|x| ConstantValue::Character(Location::manufactured(), x)) + .boxed(), + + LegalConstantType::String => { + proptest::collection::vec(proptest::char::any(), MAXIMUM_STRING_SIZE) + .prop_map(|x| { + ConstantValue::String(Location::manufactured(), String::from_iter(x)) + }) + .boxed() + } + + LegalConstantType::Number => { + let value_strat = u64::arbitrary(); + let base_strat = proptest::prop_oneof![ + proptest::strategy::Just(None), + proptest::strategy::Just(Some(2)), + proptest::strategy::Just(Some(8)), + proptest::strategy::Just(Some(10)), + proptest::strategy::Just(Some(16)), + ]; + + (value_strat, base_strat) + .prop_map(|(value, base)| { + ConstantValue::Integer( + Location::manufactured(), + IntegerWithBase { base, value }, + ) + }) + .boxed() + } + + LegalConstantType::Any => proptest::prop_oneof![ + Self::arbitrary_with(LegalConstantType::Char), + Self::arbitrary_with(LegalConstantType::String), + Self::arbitrary_with(LegalConstantType::Number), + ] + .boxed(), + } + } +} + +#[cfg(test)] +mod simplifiers { + use super::*; + + #[test] + fn types() { + let loc = Location::manufactured(); + let foo = Name::new(loc.clone(), "Foo"); + let primint = Type::Primitive(loc.clone(), Name::new(loc.clone(), "Int")); + let primchar = Type::Primitive(loc.clone(), Name::new(loc.clone(), "Char")); + let primstr = Type::Primitive(loc.clone(), Name::new(loc.clone(), "String")); + + assert_eq!( + simplify_type(&Type::Constructor(loc.clone(), foo.clone())), + vec![] + ); + assert_eq!( + simplify_type(&Type::Variable(loc.clone(), foo.clone())), + vec![] + ); + assert_eq!( + simplify_type(&Type::Primitive(loc.clone(), foo.clone())), + vec![] + ); + + assert_eq!( + simplify_type(&Type::Function(vec![], Box::new(primint.clone()))), + vec![primint.clone()] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + )), + vec![ + Type::Function(vec![], Box::new(primint.clone())), + Type::Function(vec![primint.clone()], Box::new(primint.clone())), + Type::Function(vec![primchar.clone()], Box::new(primint.clone())), + ] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + )), + vec![ + Type::Function(vec![], Box::new(primint.clone())), + Type::Function(vec![primint.clone()], Box::new(primint.clone())), + Type::Function(vec![primchar.clone()], Box::new(primint.clone())), + Type::Function(vec![primstr.clone()], Box::new(primint.clone())), + Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + ), + Type::Function( + vec![primint.clone(), primstr.clone()], + Box::new(primint.clone()) + ), + Type::Function( + vec![primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + ), + ] + ); + + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function(vec![], Box::new(primint.clone()))), + )), + vec![Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(primint.clone()) + ),] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primint.clone(), primchar.clone()], + Box::new(primint.clone()) + )), + )), + vec![ + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function(vec![], Box::new(primint.clone()))) + ), + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primint.clone()], + Box::new(primint.clone()) + )) + ), + Type::Function( + vec![primint.clone(), primchar.clone(), primstr.clone()], + Box::new(Type::Function( + vec![primchar.clone()], + Box::new(primint.clone()) + )) + ), + ] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![ + Type::Function(vec![], Box::new(primint.clone())), + primstr.clone() + ], + Box::new(primint.clone()) + )), + vec![Type::Function( + vec![primint.clone(), primstr.clone()], + Box::new(primint.clone()) + )] + ); + assert_eq!( + simplify_type(&Type::Function( + vec![ + primint.clone(), + Type::Function(vec![], Box::new(primint.clone())) + ], + Box::new(primint.clone()) + )), + vec![Type::Function( + vec![primint.clone(), primint.clone()], + Box::new(primint.clone()) + )] + ); + + let applied = Type::Application(Box::new(primint.clone()), vec![]); + assert_eq!( + simplify_type(&Type::Application(Box::new(primint.clone()), vec![])), + vec![primint.clone()] + ); + assert_eq!(simplify_type(&applied), vec![primint.clone()]); + assert_eq!( + simplify_type(&Type::Application( + Box::new(applied.clone()), + vec![primint.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primint.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![applied.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primint.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), applied.clone(), primstr.clone()] + )), + vec![Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone(), primstr.clone()] + )] + ); + assert_eq!( + simplify_type(&Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone(), primstr.clone()] + )), + vec![ + Type::Application(Box::new(primint.clone()), vec![]), + Type::Application(Box::new(primint.clone()), vec![primchar.clone()]), + Type::Application(Box::new(primint.clone()), vec![primint.clone()]), + Type::Application(Box::new(primint.clone()), vec![primstr.clone()]), + Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primint.clone()] + ), + Type::Application( + Box::new(primint.clone()), + vec![primchar.clone(), primstr.clone()] + ), + Type::Application( + Box::new(primint.clone()), + vec![primint.clone(), primstr.clone()] + ) + ] + ); + } +} diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs new file mode 100644 index 0000000..64837b9 --- /dev/null +++ b/src/syntax/ast.rs @@ -0,0 +1,336 @@ +use crate::syntax::location::{Located, Location}; +use crate::syntax::name::Name; +use proptest_derive::Arbitrary; + +#[derive(Debug)] +pub struct Module { + pub definitions: Vec, +} + +#[derive(Debug)] +pub struct Definition { + pub location: Location, + pub export: ExportClass, + pub type_restrictions: TypeRestrictions, + pub definition: Def, +} + +impl Located for Definition { + fn location(&self) -> Location { + self.location.clone() + } +} + +#[derive(Debug)] +pub enum Def { + Enumeration(EnumerationDef), + Structure(StructureDef), + Function(FunctionDef), + Value(ValueDef), + Operator(OperatorDef), +} + +impl Located for Def { + fn location(&self) -> Location { + match self { + Def::Enumeration(def) => def.location.clone(), + Def::Structure(def) => def.location.clone(), + Def::Function(def) => def.location.clone(), + Def::Value(def) => def.location.clone(), + Def::Operator(def) => def.location.clone(), + } + } +} + +#[derive(Debug)] +pub struct EnumerationDef { + pub name: Name, + pub location: Location, + pub variants: Vec, +} + +#[derive(Debug)] +pub struct EnumerationVariant { + pub location: Location, + pub name: Name, + pub argument: Option, +} + +#[derive(Debug)] +pub struct StructureDef { + pub name: Name, + pub location: Location, + pub fields: Vec, +} + +#[derive(Debug)] +pub struct StructureField { + pub location: Location, + pub export: ExportClass, + pub name: Name, + pub field_type: Option, +} + +#[derive(Debug)] +pub struct FunctionDef { + pub name: Name, + pub location: Location, + pub arguments: Vec, + pub return_type: Option, + pub body: Vec, +} + +#[derive(Debug)] +pub struct FunctionArg { + pub name: Name, + pub arg_type: Option, +} + +#[derive(Debug)] +pub struct ValueDef { + pub name: Name, + pub location: Location, + pub mtype: Option, + pub value: Expression, +} + +#[derive(Debug)] +pub struct OperatorDef { + pub operator_name: Name, + pub location: Location, + pub function_name: Name, +} + +#[derive(Debug)] +pub enum ExportClass { + Public, + Private, +} + +#[derive(Debug)] +pub enum Statement { + Binding(BindingStmt), + Expression(Expression), +} + +#[derive(Debug)] +pub struct BindingStmt { + pub location: Location, + pub mutable: bool, + pub variable: Name, + pub value: Expression, +} + +#[derive(Debug)] +pub enum Expression { + Value(ConstantValue), + Reference(Location, Name), + Enumeration(EnumerationExpr), + Structure(StructureExpr), + Conditional(ConditionalExpr), + Match(MatchExpr), + Call(Box, CallKind, Vec), + Block(Location, Vec), +} + +impl Located for Expression { + fn location(&self) -> Location { + match self { + Expression::Value(c) => c.location(), + Expression::Reference(l, _) => l.clone(), + Expression::Enumeration(ev) => ev.location.clone(), + Expression::Structure(sv) => sv.location.clone(), + Expression::Conditional(ce) => ce.location.clone(), + Expression::Match(me) => me.location.clone(), + Expression::Call(_, _, _) => unimplemented!(), + Expression::Block(l, _) => l.clone(), + } + } +} + +#[derive(Debug)] +pub struct EnumerationExpr { + pub location: Location, + pub type_name: Name, + pub variant_name: Name, + pub argument: Option>, +} + +#[derive(Debug)] +pub struct StructureExpr { + pub location: Location, + pub type_name: Name, + pub fields: Vec, +} + +#[derive(Debug)] +pub struct ConditionalExpr { + pub location: Location, + pub test: Box, + pub consequent: Box, + pub alternative: Option>, +} + +#[derive(Debug)] +pub struct MatchExpr { + pub location: Location, + pub value: Box, + pub cases: Vec, +} + +#[derive(Debug)] +pub struct MatchCase { + pub pattern: Pattern, + pub consequent: Expression, +} + +#[derive(Debug)] +pub enum Pattern { + Constant(ConstantValue), + Variable(Name), + EnumerationValue(EnumerationPattern), + Structure(StructurePattern), +} + +#[derive(Debug)] +pub struct EnumerationPattern { + pub location: Location, + pub type_name: Name, + pub variant_name: Name, + pub argument: Option>, +} + +#[derive(Debug)] +pub struct StructurePattern { + pub location: Location, + pub type_name: Name, + pub fields: Vec<(Name, Option)>, +} + +#[derive(Debug)] +pub enum CallKind { + Infix, + Normal, + Postfix, + Prefix, +} + +#[derive(Debug)] +pub struct FieldValue { + pub field: Name, + pub value: Expression, +} + +#[derive(Debug)] +pub struct TypeRestrictions { + pub restrictions: Vec, +} + +impl TypeRestrictions { + pub fn empty() -> Self { + TypeRestrictions { + restrictions: vec![], + } + } + + pub fn is_empty(&self) -> bool { + self.restrictions.is_empty() + } +} + +#[derive(Debug)] +pub struct TypeRestriction { + pub constructor: Type, + pub arguments: Vec, +} + +#[derive(Clone, Debug)] +pub enum Type { + Constructor(Location, Name), + Variable(Location, Name), + Primitive(Location, Name), + Application(Box, Vec), + Function(Vec, Box), +} + +impl PartialEq for Type { + fn eq(&self, other: &Self) -> bool { + match self { + Type::Constructor(_, x) => matches!(other, Type::Constructor(_, y) if x == y), + Type::Variable(_, x) => matches!(other, Type::Variable(_, y) if x == y), + Type::Primitive(_, x) => matches!(other, Type::Primitive(_, y) if x == y), + Type::Application(con1, args1) => { + matches!(other, Type::Application(con2, args2) if con1 == con2 && args1 == args2) + } + Type::Function(args1, ret1) => { + matches!(other, Type::Function(args2, ret2) if args1 == args2 && ret1 == ret2) + } + } + } +} + +impl Located for Type { + fn location(&self) -> Location { + match self { + Type::Constructor(l, _) => l.clone(), + Type::Variable(l, _) => l.clone(), + Type::Primitive(l, _) => l.clone(), + Type::Application(t1, ts) => { + let mut result = t1.location(); + if let Some(last) = ts.last() { + result = result.extend_to(&last.location()); + } + result + } + Type::Function(args, ret) => { + if let Some(first) = args.first() { + first.location().extend_to(&ret.location()) + } else { + ret.location() + } + } + } + } +} + +#[derive(Clone, Debug)] +pub enum ConstantValue { + Integer(Location, IntegerWithBase), + Character(Location, char), + String(Location, String), +} + +impl Located for ConstantValue { + fn location(&self) -> Location { + match self { + ConstantValue::Integer(l, _) => l.clone(), + ConstantValue::Character(l, _) => l.clone(), + ConstantValue::String(l, _) => l.clone(), + } + } +} + +impl PartialEq for ConstantValue { + fn eq(&self, other: &Self) -> bool { + match self { + ConstantValue::Character(_, x) => { + matches!(other, ConstantValue::Character(_, y) if x == y) + } + ConstantValue::String(_, x) => matches!(other, ConstantValue::String(_, y) if x == y), + ConstantValue::Integer(_, x) => matches!(other, ConstantValue::Integer(_, y) if x == y), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub struct IntegerWithBase { + #[proptest(strategy = "proptest::prop_oneof![ \ + proptest::strategy::Just(None), \ + proptest::strategy::Just(Some(2)), \ + proptest::strategy::Just(Some(8)), \ + proptest::strategy::Just(Some(10)), \ + proptest::strategy::Just(Some(16)), \ + ]")] + pub base: Option, + pub value: u64, +} diff --git a/src/syntax/error.rs b/src/syntax/error.rs new file mode 100644 index 0000000..d71af16 --- /dev/null +++ b/src/syntax/error.rs @@ -0,0 +1,147 @@ +//use codespan_reporting::diagnostic::{Diagnostic, Label}; +use crate::syntax::tokens::Token; +use internment::ArcIntern; +use std::ops::Range; +use std::path::PathBuf; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ParserError { + #[error("Lexer error at {file}: {error}")] + LexerError { + file: ArcIntern, + error: LexerError, + }, + + #[error("Unacceptable end of file at {file} while {place}")] + UnacceptableEof { + file: ArcIntern, + place: String, + }, + + #[error("Unexpected token at {file}: expected {expected}, saw {token}")] + UnexpectedToken { + file: ArcIntern, + span: Range, + token: Token, + expected: String, + }, + + #[error("Unexpected problem opening file {file}: {error}")] + OpenError { file: String, error: std::io::Error }, + + #[error("Unexpected problem reading file {file}: {error}")] + ReadError { file: String, error: std::io::Error }, + + #[error("UTF-8 problem reading file {file}: {error}")] + Utf8Error { + file: String, + error: std::str::Utf8Error, + }, +} + +#[derive(Clone, Debug, Error, PartialEq)] +pub enum LexerError { + #[error("Illegal control character in input stream at offset {offset}")] + IllegalControlCharacter { offset: usize }, + + #[error("Illegal primitive value/type; it cut off before we could determine which at {span:?}")] + IllegalPrimitive { span: Range }, + + #[error("Illegal character in primitive ({char:?}) at {span:?}")] + IllegalPrimitiveCharacter { span: Range, char: char }, + + #[error("Unfinished character constant found at {span:?}")] + UnfinishedCharacter { span: Range }, + + #[error("Unfinished string constant found at {span:?}")] + UnfinishedString { span: Range }, + + #[error("Character {char:?} has some extra bits at the end at {span:?}")] + OverlongCharacter { char: char, span: Range }, + + #[error("Unknown escaped character {escaped_char:?} at {span:?}")] + UnknownEscapeCharacter { + escaped_char: char, + span: Range, + }, + + #[error("Invalid unicode escape sequence at {span:?}")] + InvalidUnicode { span: Range }, +} + +impl LexerError { + pub fn to_triple(&self) -> (usize, Result, usize) { + match self { + LexerError::IllegalControlCharacter { offset } => (*offset, Err(self.clone()), *offset), + LexerError::IllegalPrimitive { span } => (span.start, Err(self.clone()), span.end), + LexerError::IllegalPrimitiveCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::UnfinishedString { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::OverlongCharacter { span, .. } => (span.start, Err(self.clone()), span.end), + LexerError::UnknownEscapeCharacter { span, .. } => { + (span.start, Err(self.clone()), span.end) + } + LexerError::InvalidUnicode { span, .. } => (span.start, Err(self.clone()), span.end), + } + } +} + +//impl From for Diagnostic { +// fn from(value: LexerError) -> Self { +// match value { +// LexerError::IllegalControlCharacter { file, offset } => Diagnostic::error() +// .with_code("E1001") +// .with_message("Illegal control character in input stream") +// .with_label(Label::primary(file, offset..offset).with_message("illegal character")), +// +// LexerError::IllegalPrimitive { file, span } => Diagnostic::error() +// .with_code("E1002") +// .with_message("Illegal primitive; it cut off before it could finish") +// .with_label( +// Label::primary(file, span) +// .with_message("should be at least one character after the %"), +// ), +// +// LexerError::IllegalPrimitiveCharacter { file, span, char } => Diagnostic::error() +// .with_code("E1003") +// .with_message(format!("Illegal character {char:?} in primitive")) +// .with_label(Label::primary(file, span).with_message("illegal character")), +// +// LexerError::UnfinishedCharacter { file, span } => Diagnostic::error() +// .with_code("E1004") +// .with_message("Unfinished character in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished character")), +// +// LexerError::UnfinishedString { file, span } => Diagnostic::error() +// .with_code("E1005") +// .with_message("Unfinished string in input stream.") +// .with_label(Label::primary(file, span).with_message("unfinished string")), +// +// LexerError::OverlongCharacter { file, char, span } => Diagnostic::error() +// .with_code("E1006") +// .with_message(format!( +// "Character {char:?} has some extra bits at the end of it." +// )) +// .with_label(Label::primary(file, span).with_message("overlong character")), +// +// LexerError::UnknownEscapeCharacter { +// file, +// escaped_char, +// span, +// } => Diagnostic::error() +// .with_code("E1007") +// .with_message(format!("Unknown escape character {escaped_char:?}.")) +// .with_label(Label::primary(file, span).with_message("unknown character")), +// +// LexerError::InvalidUnicode { file, span } => Diagnostic::error() +// .with_code("E1008") +// .with_message("Unknown or invalid unicode escape sequence.") +// .with_label(Label::primary(file, span).with_message("escape sequence")), +// } +// } +//} diff --git a/src/syntax/location.rs b/src/syntax/location.rs new file mode 100644 index 0000000..2bc59f0 --- /dev/null +++ b/src/syntax/location.rs @@ -0,0 +1,79 @@ +use ariadne::Span; +use internment::ArcIntern; +use std::cmp::{max, min}; +use std::ops::Range; +use std::path::PathBuf; + +pub trait Located { + fn location(&self) -> Location; +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Location { + file: ArcIntern, + span: Range, +} + +impl Span for Location { + type SourceId = ArcIntern; + + fn source(&self) -> &Self::SourceId { + &self.file + } + + fn start(&self) -> usize { + self.span.start + } + + fn end(&self) -> usize { + self.span.end + } +} + +impl Location { + pub fn new(file: &ArcIntern, span: Range) -> Self { + Location { + file: file.clone(), + span, + } + } + + pub fn manufactured() -> Self { + Location { + file: ArcIntern::new("".into()), + span: 0..0, + } + } + + pub fn extend_to(&self, other: &Location) -> Location { + assert_eq!(self.file, other.file); + Location { + file: self.file.clone(), + span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end), + } + } + + pub fn merge_span(mut self, span: Range) -> Location { + self.span = min(self.span.start, span.start)..max(self.span.end, span.end); + self + } +} + +#[test] +fn extension_and_merge() { + let file = ArcIntern::new("/foo/bar.txt".into()); + let loc1 = Location::new(&file, 1..4); + let loc2 = Location::new(&file, 4..8); + + assert_eq!(loc1.extend_to(&loc2).source(), &file); + assert_eq!(loc1.extend_to(&loc2).start(), 1); + assert_eq!(loc1.extend_to(&loc2).end(), 8); + + let loc3 = Location::new(&file, 12..16); + assert_eq!(loc1.extend_to(&loc3).source(), &file); + assert_eq!(loc1.extend_to(&loc3).start(), 1); + assert_eq!(loc1.extend_to(&loc3).end(), 16); + + assert_eq!(loc1.clone().merge_span(0..1).start(), 0); + assert_eq!(loc1.merge_span(0..1).end(), 4); +} diff --git a/src/syntax/name.rs b/src/syntax/name.rs new file mode 100644 index 0000000..ad7ef8c --- /dev/null +++ b/src/syntax/name.rs @@ -0,0 +1,156 @@ +use crate::syntax::Location; +#[cfg(test)] +use internment::ArcIntern; +use std::cmp; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::sync::atomic::{AtomicU64, Ordering}; + +static IDENTIFIER_COUNTER: AtomicU64 = AtomicU64::new(0); + +#[derive(Clone, Debug)] +pub struct Name { + printable: String, + identifier: u64, + location: Option, +} + +impl cmp::PartialEq for Name { + fn eq(&self, other: &Self) -> bool { + self.identifier == other.identifier + } +} + +impl cmp::Eq for Name {} + +impl Hash for Name { + fn hash(&self, state: &mut H) { + self.identifier.hash(state); + } +} + +impl fmt::Display for Name { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.printable, self.identifier) + } +} + +impl Name { + pub fn new(location: Location, s: S) -> Name { + let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst); + Name { + printable: s.to_string(), + identifier: my_id, + location: Some(location), + } + } + + pub fn gensym(base: &'static str) -> Name { + let formatted = format!("<{base}>"); + let my_id = IDENTIFIER_COUNTER.fetch_add(1, Ordering::SeqCst); + + Name { + printable: formatted, + identifier: my_id, + location: None, + } + } + + pub fn as_printed(&self) -> &str { + self.printable.as_str() + } + + pub fn bind_to(&mut self, other: &Name) { + self.identifier = other.identifier; + } + + pub fn location(&self) -> Option<&Location> { + self.location.as_ref() + } +} + +#[test] +fn equality() { + let file = ArcIntern::new("/foo.bang".into()); + let loc1 = Location::new(&file, 0..3); + let loc2 = Location::new(&file, 9..12); + + assert_ne!(Name::gensym("x"), Name::gensym("x")); + assert_ne!(Name::new(loc1.clone(), "x"), Name::new(loc1.clone(), "x")); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc2.clone()) + } + ); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "x".into(), + identifier: 5, + location: None + } + ); + assert_eq!( + Name { + printable: "x".into(), + identifier: 5, + location: Some(loc1.clone()) + }, + Name { + printable: "y".into(), + identifier: 5, + location: None + } + ); +} + +#[test] +fn hashing() { + let file = ArcIntern::new("/foo.bang".into()); + let loc1 = Location::new(&file, 0..3); + let loc2 = Location::new(&file, 9..12); + + let x1 = Name { + printable: "x".into(), + identifier: 1, + location: Some(loc1), + }; + let mut x2 = Name { + printable: "x".into(), + identifier: 2, + location: Some(loc2), + }; + let y1 = Name { + printable: "y".into(), + identifier: 1, + location: None, + }; + + let run_hash = |name: &Name| { + let mut hash = std::hash::DefaultHasher::new(); + name.hash(&mut hash); + hash.finish() + }; + + let hash_x1 = run_hash(&x1); + let hash_x2 = run_hash(&x2); + let hash_y1 = run_hash(&y1); + + assert_ne!(hash_x1, hash_x2); + assert_eq!(hash_x1, hash_y1); + + x2.bind_to(&x1); + let rehashed_x2 = run_hash(&x2); + assert_eq!(hash_x1, rehashed_x2); +} diff --git a/src/syntax/parse.rs b/src/syntax/parse.rs new file mode 100644 index 0000000..5025c5e --- /dev/null +++ b/src/syntax/parse.rs @@ -0,0 +1,1785 @@ +use crate::syntax::error::ParserError; +use crate::syntax::tokens::{Lexer, LocatedToken, Token}; +use crate::syntax::*; +use internment::ArcIntern; +use std::collections::HashMap; +use std::ops::Range; +use std::path::{Path, PathBuf}; + +/// A parser for a particular file. +/// +/// This parser should be used for exactly one file, and its lifetime +/// must be tied to the underlying lexer. However, after the parser is +/// done, the resultant object should have no lifetime links to the +/// original file, so it can be thrown away. +/// +/// The parser includes information about operator precedence that is +/// stateful. +pub struct Parser<'lexer> { + file: ArcIntern, + lexer: Lexer<'lexer>, + known_tokens: Vec, + prefix_precedence_table: HashMap, + infix_precedence_table: HashMap, + postfix_precedence_table: HashMap, +} + +/// The directional associativity for an operator. +/// +/// This directionality impacts whether (a + b + c) defaults to +/// ((a + b) + c) or (a + (b + c)). It does not effect situations +/// in which operator numeric precedence is different between +/// operators. +pub enum Associativity { + Left, + Right, + None, +} + +/// The kind of operators we use. This is only narrowly useful inside +/// this particular crate. +enum OperatorType { + Prefix, + Infix, + Postfix, +} + +impl<'lexer> Parser<'lexer> { + /// Create a new parser from the given file index and lexer. + /// + /// The file index will be used for annotating locations and for + /// error messages. If you don't care about either, you can use + /// 0 with no loss of functionality. (Obviously, it will be harder + /// to create quality error messages, but you already knew that.) + pub fn new>(file: P, lexer: Lexer<'lexer>) -> Parser<'lexer> { + Parser { + file: ArcIntern::new(file.as_ref().to_path_buf()), + lexer, + known_tokens: vec![], + prefix_precedence_table: HashMap::new(), + infix_precedence_table: HashMap::new(), + postfix_precedence_table: HashMap::new(), + } + } + + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + /// + /// This is used for infix operators, only. + pub fn add_infix_precedence( + &mut self, + operator: S, + associativity: Associativity, + level: u8, + ) { + let actual_associativity = match associativity { + Associativity::Left => (level * 2, (level * 2) + 1), + Associativity::Right => ((level * 2) + 1, level * 2), + Associativity::None => (level * 2, level * 2), + }; + + self.infix_precedence_table + .insert(operator.to_string(), actual_associativity); + } + + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + /// + /// This is used for prefix operators, only. + pub fn add_prefix_precedence(&mut self, operator: S, level: u8) { + self.prefix_precedence_table + .insert(operator.to_string(), level * 2); + } + + /// Add the given operator to our precedence table, at the given + /// precedence level and associativity. + /// + /// This is used for postfix operators, only. + pub fn add_postfix_precedence(&mut self, operator: S, level: u8) { + self.postfix_precedence_table + .insert(operator.to_string(), level * 2); + } + + /// Get the precedence of the given operator. + /// + /// FIXME: This currently only functions on infix operators, not + /// prefix and postfix. In general, this can all be cleaned up. + fn get_precedence(&self, name: &String) -> (u8, u8) { + match self.infix_precedence_table.get(name) { + None => (19, 20), + Some(x) => *x, + } + } + + /// Get the next token from the input stream, or None if we're at + /// the end of a stream. + /// + /// Ok(None) represents "we have reached the end of the stream", while + /// an Err(_) means that we ran into some sort of error (UTF-8 formatting, + /// lexing, IO, etc.) in reading the stream. + pub fn next(&mut self) -> Result, ParserError> { + let result = self.known_tokens.pop(); + + if result.is_some() { + Ok(result) + } else { + self.lexer + .next() + .transpose() + .map_err(|error| ParserError::LexerError { + file: self.file.clone(), + error, + }) + } + } + + /// Save the given token back to the top of the stream. + /// + /// This is essentially an "undo" on next(), or an alternative path for + /// peeking at the next token in the stream. + fn save(&mut self, token: LocatedToken) { + self.known_tokens.push(token) + } + + /// Get the location of the next token in the stream. + /// + /// This will return an error if we're at the end of the file. + fn current_location(&mut self) -> Result { + let current = self.next()?; + match current { + None => Err(self.bad_eof("trying to get current location")), + Some(token) => { + let retval = self.to_location(token.span.clone()); + self.save(token); + Ok(retval) + } + } + } + + /// Generate the parser error that should happen when we hit an EOF + /// in a bad place. + fn bad_eof(&mut self, place: S) -> ParserError { + ParserError::UnacceptableEof { + file: self.file.clone(), + place: place.to_string(), + } + } + + /// Convert an offset into a formal location that can be saved off + /// into ASTs. + fn to_location(&self, span: Range) -> Location { + Location::new(&self.file, span) + } + + /// See if the next token is the keyword, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_keyword(&mut self, keyword: &'static str) -> Result { + match self.next()? { + None => Err(self.bad_eof(format!("looking for keyword '{keyword}'"))), + Some(ltoken) => match ltoken.token { + Token::ValueName(s) if s.as_str() == keyword => Ok(self.to_location(ltoken.span)), + _ => { + self.save(ltoken.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: ltoken.span, + token: ltoken.token, + expected: format!("keyword {keyword}"), + }) + } + }, + } + } + + /// See if the next token is an operator, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_operator(&mut self, op: &'static str) -> Result { + match self.next()? { + None => Err(self.bad_eof(format!("looking for symbol '{op}'"))), + Some(ltoken) => match ltoken.token { + Token::OperatorName(s) if s.as_str() == op => Ok(self.to_location(ltoken.span)), + _ => { + self.save(ltoken.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: ltoken.span, + token: ltoken.token, + expected: format!("symbol {op}"), + }) + } + }, + } + } + + /// See if the next token is the given one, as expected. + /// + /// If it isn't, this routine will provide an error, but it will make + /// sure to put the token back into the stream. + fn require_token( + &mut self, + token: Token, + place: &'static str, + ) -> Result { + let message = || format!("looking for '{token}' in {place}"); + let next = self.next()?.ok_or_else(|| self.bad_eof(message()))?; + + if next.token != token { + self.save(next.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: message(), + }) + } else { + Ok(self.to_location(next.span)) + } + } + + /// Parse the top level file associated with a Bang module. + /// + /// This will expect to read until EOF, and will fail or stall + /// forever if there is no EOF, or the EOF ends in the wrong + /// place. So this should *not* be used for interactive sessions, + /// because those are unlikely to have EOFs in the appropriate + /// places. + pub fn parse_module(&mut self) -> Result { + let mut definitions = vec![]; + + loop { + if let Some(next_token) = self.next()? { + self.save(next_token); + definitions.push(self.parse_definition()?); + } else { + return Ok(Module { definitions }); + } + } + } + + #[allow(unused)] + #[cfg(not(coverage))] + fn print_next_token(&mut self, comment: &str) { + let token = self.next().expect("can get token"); + println!( + "[{comment}] next token will be {:?}", + token.as_ref().map(|x| x.token.clone()) + ); + if let Some(token) = token { + self.save(token); + } + } + + /// Parse a definition in a file (structure, enumeration, value, etc.). + /// + /// This will read a definition. If there's an error, it's very likely the + /// input stream will be corrupted, so you probably don't want to try to + /// recover. You can, obviously. + pub fn parse_definition(&mut self) -> Result { + let (export, start) = self.parse_export_class()?; + let type_restrictions = self.parse_type_restrictions()?; + let definition = self.parse_def()?; + let location = definition.location().extend_to(&start); + + Ok(Definition { + location, + export, + type_restrictions, + definition, + }) + } + + /// Parse the export class for the current definition. + /// + /// If there isn't an 'export' declaration, then this will return 'private', + /// because if it hasn't been declared exported then it's private. But this + /// does mean that a future parsing error will be assumed to be a private + /// declaration. + fn parse_export_class(&mut self) -> Result<(ExportClass, Location), ParserError> { + if let Ok(span) = self.require_keyword("export") { + Ok((ExportClass::Public, span)) + } else { + let start = self.current_location()?; + Ok((ExportClass::Private, start)) + } + } + + /// Parse a type restriction and return it. + /// + /// Like the export class parsing, parsing type restrictions has a clear + /// default (no restrictions) when the input doesn't lead with the appropriate + /// keyword. As a result, this can generate a result even in cases in which + /// the input is empty. + pub fn parse_type_restrictions(&mut self) -> Result { + if self.require_keyword("restrict").is_err() { + return Ok(TypeRestrictions::empty()); + } + let _ = self.require_token(Token::OpenParen, "type restriction")?; + + let mut restrictions = vec![]; + + while let Some(type_restriction) = self.parse_type_restriction()? { + restrictions.push(type_restriction); + } + + let _ = self.require_token(Token::CloseParen, "type restriction")?; + Ok(TypeRestrictions { restrictions }) + } + + /// Parse a single type retriction. + /// + /// A type restriction should consist of a constructor token followed by + /// some number of arguments. We parse this in the obvious way, stopping + /// the input when we hit something that isn't a base type. + /// + /// Note that, because of this, we might end up in a situation in which + /// we throw an error after consuming a bunch of input, meaning that it + /// will be impossible to recover. + fn parse_type_restriction(&mut self) -> Result, ParserError> { + let maybe_constructor = self + .next()? + .ok_or_else(|| self.bad_eof("Looking for constructor for type restriction"))?; + + let constructor = match maybe_constructor.token { + Token::TypeName(str) => { + let name = Name::new(self.to_location(maybe_constructor.span.clone()), str); + Type::Constructor(self.to_location(maybe_constructor.span), name) + } + Token::PrimitiveTypeName(str) => { + let name = Name::new(self.to_location(maybe_constructor.span.clone()), str); + Type::Primitive(self.to_location(maybe_constructor.span), name) + } + + token @ Token::CloseParen | token @ Token::Comma => { + self.save(LocatedToken { + token, + span: maybe_constructor.span, + }); + return Ok(None); + } + + weird => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_constructor.span, + token: weird, + expected: "Constructor name, comma, or close parenthesis in type restriction" + .into(), + }); + } + }; + + let mut arguments = vec![]; + + while let Ok(t) = self.parse_base_type() { + arguments.push(t); + } + + let restriction = TypeRestriction { + constructor, + arguments, + }; + + let _ = self.require_token(Token::Comma, ""); + + Ok(Some(restriction)) + } + + /// Parse a definition. + /// + /// A definition can include a structure definition, the definition of an enumeration, + /// the declaration of some sort of operator, or a value definition. (This statement + /// assumes that you consider a function a value, which is reasonable.) + /// + /// If this returns an error, you should not presume that you can recover from it. + fn parse_def(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for definition body"))?; + + match next.token { + Token::ValueName(ref x) if x == "structure" => { + self.save(next); + Ok(Def::Structure(self.parse_structure()?)) + } + + Token::ValueName(ref x) if x == "enumeration" => { + self.save(next); + Ok(Def::Enumeration(self.parse_enumeration()?)) + } + + Token::ValueName(ref x) + if x == "operator" || x == "prefix" || x == "infix" || x == "postfix" => + { + self.save(next); + Ok(Def::Operator(self.parse_operator()?)) + } + + Token::ValueName(_) => { + self.save(next); + self.parse_function_or_value() + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "'structure', 'enumeration', 'operator', or a value identifier".into(), + }), + } + } + + /// Parse a structure definition. + /// + /// Structure definitions should start with the keyword "structure". If they + /// don't, this will return, but it will do so in a way that is recoverable. + /// Otherwise, we'll start eating tokens and who knows what state we'll end + /// in. + pub fn parse_structure(&mut self) -> Result { + let start_location = self.require_keyword("structure")?; + + let structure_name = self.parse_type_name("structure definition")?; + self.require_token(Token::OpenBrace, "after a structure name")?; + + let mut fields = vec![]; + while let Some(field_definition) = self.parse_field_definition()? { + fields.push(field_definition); + } + + let brace = + self.require_token(Token::CloseBrace, "at the end of a structure definition")?; + + let location = start_location.extend_to(&brace); + + Ok(StructureDef { + name: structure_name, + location, + fields, + }) + } + + /// Parse a name and field value for a field inside a structure constructor. + /// + /// In this case, what we mean is the full "foo: bar" syntax that goes inside a structure + /// expression to declare a value. + pub fn parse_field_value(&mut self) -> Result, ParserError> { + let Ok(field) = self.parse_name("structure value") else { + return Ok(None); + }; + self.require_token(Token::Colon, "after a field name")?; + let value = self.parse_expression()?; + + if let Some(end_token) = self.next()? + && !matches!(end_token.token, Token::Comma) + { + self.save(end_token); + } + + Ok(Some(FieldValue { field, value })) + } + + /// Parse a name and field definition for a field inside a structure definition. + /// + /// In this case, what we mean is the full "foo: Bar" syntax that goes inside a + /// structure type definition. Note, though, that we allow the ": Bar" to be + /// elided in the case that the user wants to try to infer the type. In addition, + /// recall that structure types can declare their individual fields public or + /// not, so that information gets parsed as well. + pub fn parse_field_definition(&mut self) -> Result, ParserError> { + let (export, start_location) = self.parse_export_class()?; + let Ok(name) = self.parse_name("field definition") else { + return Ok(None); + }; + + let maybe_colon = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, comma, or close brace after field name") + })?; + + let field_type = match maybe_colon.token { + Token::Comma | Token::CloseBrace => { + self.save(maybe_colon); + None + } + + Token::Colon => Some(self.parse_type()?), + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_colon.span, + token: maybe_colon.token, + expected: "colon, comma, or close brace after field name".into(), + }); + } + }; + + let end_token = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after field definition") + })?; + + let maybe_end_location = match end_token.token { + Token::Comma => Some(self.to_location(end_token.span)), + Token::CloseBrace => { + self.save(end_token); + None + } + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: end_token.span, + token: end_token.token, + expected: "looking for comma or close brace after field definition".into(), + }); + } + }; + + let end_location = maybe_end_location + .or_else(|| field_type.as_ref().map(|x| x.location())) + .unwrap_or_else(|| name.location().unwrap().clone()); + let location = start_location.extend_to(&end_location); + + Ok(Some(StructureField { + location, + export, + name, + field_type, + })) + } + + /// Parse an enumeration declaration from the input stream. + /// + /// As with structures, this will cleanly abort if the first token is wrong, + /// but if it makes it past that token, all bets are off. + pub fn parse_enumeration(&mut self) -> Result { + let start_location = self.require_keyword("enumeration")?; + let enumeration_name = self.parse_type_name("enumeration definition")?; + + self.require_token(Token::OpenBrace, "after enumeration name")?; + + let mut variants = vec![]; + while let Some(variant_definition) = self.parse_enum_variant()? { + variants.push(variant_definition); + } + + let brace = self.require_token(Token::CloseBrace, "after enumeration options")?; + + let location = start_location.extend_to(&brace); + + Ok(EnumerationDef { + name: enumeration_name, + location, + variants, + }) + } + + /// Parse a variant of an enumeration in the enumeration definition. + /// + /// At this point in bang's lifecycle, enumerations can have zero or one arguments, + /// but no more, which simplified parsing a trace. + pub fn parse_enum_variant(&mut self) -> Result, ParserError> { + let Ok(name) = self.parse_type_name("variant definition") else { + return Ok(None); + }; + let start_location = name.location().unwrap().clone(); + + let maybe_paren = self + .next()? + .ok_or_else(|| self.bad_eof("trying to understand enumeration variant"))?; + let (argument, arg_location) = if matches!(maybe_paren.token, Token::OpenParen) { + let t = self.parse_type()?; + self.require_token(Token::CloseParen, "variant's type argument")?; + let location = t.location(); + (Some(t), location) + } else { + self.save(maybe_paren); + (None, start_location.clone()) + }; + + let ender = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after enumeration variant") + })?; + let end_location = match ender.token { + Token::Comma => self.to_location(ender.span), + Token::CloseBrace => { + self.save(ender); + arg_location + } + _ => { + self.save(ender.clone()); + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: ender.span, + token: ender.token, + expected: "comma or close brace after enumeration variant".into(), + }); + } + }; + + let location = start_location.extend_to(&end_location); + + Ok(Some(EnumerationVariant { + name, + location, + argument, + })) + } + + /// Parse an operator declaration. + /// + /// Operator declarations are the only thing where we immediately modify the state + /// of the parser, allowing the operator to be used immediately after it is declared. + /// Note that by "declare", we mean that the operator is given a variable that it maps + /// to; that variable can be declared further on in the file or even in another module, + /// as we won't try to resolve it until later. + /// + /// Like most definitions, we'll abort cleanly if the first token isn't "operator", + /// "infix", "postfix", or "prefix" keywords, but all bets are off after that. + pub fn parse_operator(&mut self) -> Result { + let (start, operator_type, associativity) = { + let mut optype = OperatorType::Infix; + let mut start = None; + let mut assoc = Associativity::None; + + if let Ok(loc) = self.require_keyword("prefix") { + optype = OperatorType::Prefix; + start = Some(loc); + } else if let Ok(loc) = self.require_keyword("postfix") { + optype = OperatorType::Postfix; + start = Some(loc); + } else if let Ok(loc) = self.require_keyword("infix") { + start = Some(loc); + + if self.require_keyword("right").is_ok() { + assoc = Associativity::Right; + } else if self.require_keyword("left").is_ok() { + assoc = Associativity::Left; + } + } + + let oploc = self.require_keyword("operator")?; + (start.unwrap_or(oploc), optype, assoc) + }; + let operator_name = self.parse_operator_name("operator definition")?; + + let level = if self.require_keyword("at").is_ok() { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("precedence value in operator definition"))?; + + match next.token { + Token::Integer(int_with_base) if int_with_base.value < 10 => { + int_with_base.value as u8 + } + + Token::Integer(ref int_with_base) => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token.clone(), + expected: format!( + "number defining operator precedence ({} is too large", + int_with_base.value + ), + }); + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "number defining operator precedence".into(), + }); + } + } + } else { + 5 + }; + + let _ = self.require_token(Token::Arrow, "operator definition")?; + + let function_name = self.parse_name("operator function definition")?; + let end = self.require_token(Token::Semi, "end of operator definition")?; + + match operator_type { + OperatorType::Infix => { + self.add_infix_precedence(operator_name.as_printed(), associativity, level) + } + OperatorType::Prefix => self.add_prefix_precedence(operator_name.as_printed(), level), + OperatorType::Postfix => self.add_postfix_precedence(operator_name.as_printed(), level), + } + + Ok(OperatorDef { + location: start.extend_to(&end), + operator_name, + function_name, + }) + } + + /// Parse a function or a value. + /// + /// Technically speaking, functions are values, so the name can feel a little silly. + /// However, we have some nice syntax for functions that avoids the need to put lambdas + /// everywhere, and so we sort of treat them differently. + fn parse_function_or_value(&mut self) -> Result { + let name = self.parse_name("function or value definition")?; + let start = name.location().unwrap().clone(); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("type or value for definition"))?; + + match next.token { + // If we see an open parenthesis next, we're looking at a nicely-formatted + // function definition, such as: + // + // factorial(x: Int) : Int { + // match x { + // 1 => 1, + // x => x * fact(x - 1), + // } + // } + // + // Or any of many variations of that. + Token::OpenParen => { + self.save(next); + let arguments = self.parse_function_def_arguments()?; + let mut return_type = None; + + if self.require_token(Token::Colon, "return type").is_ok() { + return_type = Some(self.parse_type()?); + } + + let Expression::Block(end, body) = self.parse_block()? else { + panic!("parse_block returned something that wasn't a block."); + }; + + Ok(Def::Function(FunctionDef { + name, + location: start.extend_to(&end), + arguments, + return_type, + body, + })) + } + + // If we see a colon, then someone's giving us a type for what is probably + // some form of simple constant, such as: + // + // foo : Int = 4; + // + // But honestly, there's a lot of odd possibilities of complicated things + // they could write there. + Token::Colon => { + let value_type = self.parse_type()?; + let _ = self.require_operator("=")?; + let value = self.parse_expression()?; + let end = self.require_token(Token::Semi, "at end of definition")?; + + Ok(Def::Value(ValueDef { + name, + location: start.extend_to(&end), + mtype: Some(value_type), + value, + })) + } + + // If we see an equal sign, we're jumping right to the value part of the + // definition, and we're doing something like this: + // + // foo = 4; + // + // Again, though, you could write all sorts of interesting things after + // that. + Token::OperatorName(eq) if eq == "=" => { + let value = self.parse_expression()?; + let end = self.require_token(Token::Semi, "at end of definition")?; + + Ok(Def::Value(ValueDef { + name, + location: start.extend_to(&end), + mtype: None, + value, + })) + } + + // Those should be the only cases, so if we get here, something weird + // is going on. + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "open parenthesis, colon, or equals after variable in definition".into(), + }), + } + } + + /// Parse the arguments to a function declaration. + /// + /// Function arguments should have types, but don't have to. This function assumes + /// that it's starting at the opening parenthesis, and will error (cleanly) if it + /// isn't. + fn parse_function_def_arguments(&mut self) -> Result, ParserError> { + let _ = self.require_token(Token::OpenParen, "start of function argument definition")?; + let mut result = vec![]; + let mut just_skipped_comma = false; + + loop { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("parsing function arguments"))?; + + if matches!(next.token, Token::CloseParen) { + break; + } + + if matches!(next.token, Token::Comma) { + if just_skipped_comma { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "after another comma in function arguments".into(), + }); + } + + just_skipped_comma = true; + continue; + } + + self.save(next); + just_skipped_comma = false; + let name = self.parse_name("function argument name")?; + let mut arg_type = None; + + if self.require_token(Token::Colon, "").is_ok() { + arg_type = Some(self.parse_type()?); + } + + result.push(FunctionArg { name, arg_type }); + } + + Ok(result) + } + + /// Parse a single expression out of the input stream. + /// + /// Because expressions can start with so many possible tokens, it's very + /// likely that if you call this, the input stream will be corrupted by any + /// errors this function returns. So you should be careful to only call it + /// in situations that don't require rollback. + pub fn parse_expression(&mut self) -> Result { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an expression"))?; + + self.save(next.clone()); + match next.token { + Token::ValueName(x) if x == "match" => { + Ok(Expression::Match(self.parse_match_expression()?)) + } + Token::ValueName(x) if x == "if" => { + Ok(Expression::Conditional(self.parse_if_expression()?)) + } + _ => self.parse_arithmetic(0), + } + } + + /// Parse a match expression. + /// + /// This function does assume that the next token in the input stream will + /// be the "match" keyword, and will error immediately (albeit, saving the + /// stream) if it isn't. So you *can* use this if you're not sure this is + /// a match expression, and want to escape if it isn't. + fn parse_match_expression(&mut self) -> Result { + let start = self.require_keyword("match")?; + let value = Box::new(self.parse_arithmetic(0)?); + self.require_token(Token::OpenBrace, "start of a match case list")?; + + let mut cases = vec![]; + while let Some(case) = self.parse_match_case()? { + cases.push(case); + } + + let end = self.require_token(Token::CloseBrace, "end of a match case list")?; + Ok(MatchExpr { + location: start.extend_to(&end), + value, + cases, + }) + } + + /// Parse a single match case. + /// + /// A match case consists of a pattern, a double-arrow, and then an expression + /// describing what to do if that pattern matches the expression. It may or may + /// not conclude with a comma. + fn parse_match_case(&mut self) -> Result, ParserError> { + // skip over anything we can just skip + loop { + let peeked = self + .next()? + .ok_or_else(|| self.bad_eof("looking for match case"))?; + + if matches!(peeked.token, Token::Comma) { + continue; + } + + let stop = matches!(peeked.token, Token::CloseBrace); + + self.save(peeked); + if stop { + return Ok(None); + } + + break; + } + + let pattern = self.parse_pattern()?; + self.require_token(Token::Arrow, "after pattern in match clause")?; + + let consequent = self.parse_expression()?; + + Ok(Some(MatchCase { + pattern, + consequent, + })) + } + + /// Parse a pattern from the input stream. + /// + /// Patterns are a recursive, complex structure without a clear opening token. + /// So ... you better be sure that you want a pattern when you call this, + /// because you're almost certainly not going to be able to recover and try + /// something else if this breaks. + pub fn parse_pattern(&mut self) -> Result { + if let Ok(constant) = self.parse_constant() { + return Ok(Pattern::Constant(constant)); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a pattern to match"))?; + + match next.token { + Token::ValueName(x) => { + let name = Name::new(self.to_location(next.span), x); + Ok(Pattern::Variable(name)) + } + + Token::TypeName(x) => { + let type_name = Name::new(self.to_location(next.span.clone()), x); + let start = self.to_location(next.span); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a pattern to match"))?; + match next.token { + Token::OpenBrace => { + let mut fields = vec![]; + + while let Some(field_pattern) = self.parse_field_pattern()? { + fields.push(field_pattern) + } + + let end = + self.require_token(Token::CloseBrace, "after structure pattern")?; + let structure_pattern = StructurePattern { + location: start.extend_to(&end), + type_name, + fields, + }; + + Ok(Pattern::Structure(structure_pattern)) + } + + Token::DoubleColon => { + let variant_name = + self.parse_type_name("enumeration pattern variant name")?; + + let mut final_location = variant_name.location().unwrap().clone(); + + let argument = if let Some(maybe_paren) = self.next()? { + if matches!(maybe_paren.token, Token::OpenParen) { + let sub_pattern = self.parse_pattern()?; + final_location = self.require_token( + Token::CloseParen, + "after enumeration pattern argument", + )?; + + Some(Box::new(sub_pattern)) + } else { + self.save(maybe_paren); + None + } + } else { + None + }; + + let location = start.extend_to(&final_location); + + let pattern = EnumerationPattern { + location, + type_name, + variant_name, + argument, + }; + + Ok(Pattern::EnumerationValue(pattern)) + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "An '::' or '{' after a type name in a pattern".into(), + }), + } + } + + _ => Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "The start of a pattern: a variable name or type name".into(), + }), + } + } + + /// Parse a field pattern. + /// + /// For reference, a field pattern is either just the name of a field, or a name of a + /// field plus a colon and some form of subpattern. This can be used to either rename + /// a field or to only match when a field has a particular value. + /// + /// Regardless, this should start with a name, and if it doesn't start with a name, + /// we'll return Ok(None) to indicate that we're done parsing field patterns. If we + /// do get a name and then reach some sort of error, though, who knows what state we'll + /// end up in. + fn parse_field_pattern(&mut self) -> Result)>, ParserError> { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for structure pattern field name"))?; + let name = match next.token { + Token::CloseBrace => { + self.save(next); + return Ok(None); + } + + Token::ValueName(s) => Name::new(self.to_location(next.span), s), + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "a field name in a structure pattern".into(), + }); + } + }; + + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for colon, comma, or brace after structure field name in pattern") + })?; + let sub_pattern = match next.token { + Token::Comma => None, + + Token::CloseBrace => { + self.save(next); + None + } + + Token::Colon => { + let subpattern = self.parse_pattern()?; + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close brace after structure field") + })?; + + match next.token { + Token::Comma => {} + Token::CloseBrace => self.save(next), + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "comma or close brace after structure field".into(), + }); + } + } + + Some(subpattern) + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "colon, comma, or brace after structure field name in pattern".into(), + }); + } + }; + + Ok(Some((name, sub_pattern))) + } + + /// Parse an if expression. + /// + /// Like many of these functions, there's a nice indicator immediately available to us + /// so that we know whether or not this is an if statement. If we don't see it, we will + /// return with an error but the input stream will be clean. However, if we do see one, + /// and there's an error down the line, then there's nothing we can do. + fn parse_if_expression(&mut self) -> Result { + let start = self.require_keyword("if")?; + let test = self.parse_arithmetic(0)?; + let consequent = self.parse_block()?; + let mut alternative = None; + + if self.require_keyword("else").is_ok() { + alternative = Some(Box::new(self.parse_block()?)); + } + + let end = alternative + .as_ref() + .map(|x| x.location()) + .unwrap_or_else(|| consequent.location()); + + Ok(ConditionalExpr { + location: start.extend_to(&end), + test: Box::new(test), + consequent: Box::new(consequent), + alternative, + }) + } + + /// Parse a block. + /// + /// A block starts with an open brace -- so if we don't see one, we'll exit cleanly -- + /// but gets real complicated after that. So, once again, be thoughtful about how this + /// is called. + pub fn parse_block(&mut self) -> Result { + let start = self.require_token(Token::OpenBrace, "start of a block")?; + + let mut statements = vec![]; + let mut ended_with_expr = false; + + while let Some((stmt, terminal)) = self.parse_statement()? { + statements.push(stmt); + if terminal { + ended_with_expr = true; + break; + } + } + + let end = self.require_token(Token::CloseBrace, "end of a block")?; + + if !ended_with_expr { + let void_name = Name::new(end.clone(), "%prim%void"); + let void_ref = Expression::Reference(end.clone(), void_name); + let void_call = Expression::Call(Box::new(void_ref), CallKind::Normal, vec![]); + statements.push(Statement::Expression(void_call)); + } + + Ok(Expression::Block(start.extend_to(&end), statements)) + } + + /// Parse a statement, or return None if we're now done with parsing a block. + /// + /// We know we're done parsing a block when we hit a close brace, basically. We + /// should ignore excess semicolons cleanly, and that sort of thing. Because + /// statements vary pretty widely, you should not assume that the input is clean + /// on any sort of error. + pub fn parse_statement(&mut self) -> Result, ParserError> { + loop { + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a statement or close brace"))?; + + match next.token { + Token::CloseBrace => { + self.save(next); + return Ok(None); + } + + Token::Semi => continue, + + Token::ValueName(ref l) if l == "let" => { + self.save(next); + return Ok(Some((Statement::Binding(self.parse_let()?), false))); + } + + _ => { + self.save(next); + let expr = Statement::Expression(self.parse_expression()?); + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for semicolon or close brace"))?; + + if matches!(next.token, Token::Semi) { + return Ok(Some((expr, false))); + } else { + self.save(next); + return Ok(Some((expr, true))); + } + } + } + } + } + + /// Parse a let statement. + /// + /// This will assume that the first token in the stream is a "let", and be upset if + /// it is not. However, it will be upset cleanly, which is nice. + pub fn parse_let(&mut self) -> Result { + let start = self.require_keyword("let")?; + let mutable = self.require_keyword("mut").is_ok(); + let variable = self.parse_name("let binding")?; + let _ = self.require_operator("=")?; + let value = self.parse_expression()?; + let end = self.require_token(Token::Semi, "let statement")?; + + Ok(BindingStmt { + location: start.extend_to(&end), + mutable, + variable, + value, + }) + } + + /// Parse an arithmetic expression, obeying the laws of precedence. + /// + /// This is an implementation of Pratt Parsing, although I've probably done it in + /// a much more awkward way than necessary. I was heavily inspired and/or stole + /// code directly from [this + /// article](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html), + /// which was instrumental in its design. All errors mine. + /// + /// Note that because arithmetic expressions can start with so many tokens, you + /// should only call this function if you are absolutely sure that there's an + /// expression waiting for you, and it would be an error if there wasn't. + pub fn parse_arithmetic(&mut self, level: u8) -> Result { + // start by checking for prefix operators. + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for arithmetic expression"))?; + + let mut lhs = if let Token::OperatorName(ref n) = next.token { + if let Some(pre_prec) = self.prefix_precedence_table.get(n) { + if *pre_prec < level { + self.save(next.clone()); + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "a base expression of a tighter-binding prefix operator".into(), + }); + } + + let rhs = self.parse_arithmetic(*pre_prec)?; + let location = self.to_location(next.span); + let opname = Name::new(location.clone(), n); + let op_expr = Expression::Reference(location, opname); + + Expression::Call(Box::new(op_expr), CallKind::Prefix, vec![rhs]) + } else { + self.save(next); + self.parse_base_expression()? + } + } else { + self.save(next); + self.parse_base_expression()? + }; + + loop { + let Some(next) = self.next()? else { + return Ok(lhs); + }; + + match next.token { + Token::OpenParen => { + self.save(next); + let args = self.parse_call_arguments()?; + lhs = Expression::Call(Box::new(lhs), CallKind::Normal, args); + } + + Token::OperatorName(ref n) => { + if let Some(postprec) = self.postfix_precedence_table.get(n) { + if *postprec < level { + self.save(next); + break; + } + + let location = self.to_location(next.span); + let opname = Name::new(location.clone(), n); + let op_expr = Expression::Reference(location, opname); + + lhs = Expression::Call(Box::new(op_expr), CallKind::Postfix, vec![lhs]); + continue; + } + + let (left_pr, right_pr) = self.get_precedence(n); + + if left_pr < level { + self.save(next); + break; + } + + let rhs = self.parse_arithmetic(right_pr)?; + let location = self.to_location(next.span); + let name = Name::new(location.clone(), n); + let opref = Box::new(Expression::Reference(location, name)); + let args = vec![lhs, rhs]; + + lhs = Expression::Call(opref, CallKind::Infix, args); + } + + _ => { + self.save(next); + return Ok(lhs); + } + } + } + + Ok(lhs) + } + + /// Parse the arguments to a function call. + /// + /// We assume that, at this point, you have eaten the thing you're calling out of + /// the input stream, and are on the parenthesis that defines the arguments to the + /// function. If you're not there, then this will error, but in a way that you can + /// recover from. + fn parse_call_arguments(&mut self) -> Result, ParserError> { + let _ = self.require_token(Token::OpenParen, "for function arguments")?; + let mut args = vec![]; + + loop { + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for an expression or close paren in function arguments") + })?; + + if matches!(next.token, Token::CloseParen) { + break; + } + + self.save(next); + let argument = self.parse_arithmetic(0)?; + args.push(argument); + + let next = self.next()?.ok_or_else(|| { + self.bad_eof("looking for comma or close paren in function arguments") + })?; + match next.token { + Token::Comma => continue, + Token::CloseParen => break, + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "comma or close paren in function arguments".into(), + }); + } + } + } + + Ok(args) + } + + /// Parse a base expression. + /// + /// A base expression can be any number of things: + /// * A constant, of any form + /// * A variable name + /// * A constructor, like a structure constructor or an enumeration value + /// * A parenthesized expression of some other form + /// * A block + /// + /// Most of these can be identified by the first token in the input + /// stream. If we don't recognize a valid first token in the input + /// stream, we return an error and restore the original input stream + /// state. However, if the first token leads us to a valid next state, + /// we may not be able to recover the original stream state on an error. + /// + /// As a result, this should only be called when you're very confident + /// that the next thing is going to be an expression. + pub fn parse_base_expression(&mut self) -> Result { + if let Ok(v) = self.parse_constant() { + return Ok(Expression::Value(v)); + } + + let next = self + .next()? + .ok_or_else(|| self.bad_eof("looking for an expression"))?; + + match next.token { + Token::OpenBrace => { + self.save(next); + self.parse_block() + } + + Token::OpenParen => { + let inner = self.parse_expression()?; + self.require_token(Token::CloseParen, "the end of a parenthesized expression")?; + Ok(inner) + } + + Token::TypeName(n) | Token::PrimitiveTypeName(n) => { + let type_name = Name::new(self.to_location(next.span.clone()), n); + let Some(after_type_name) = self.next()? else { + return Ok(Expression::Reference( + type_name.location().unwrap().clone(), + type_name, + )); + }; + + match after_type_name.token { + Token::OpenBrace => { + let mut fields = vec![]; + + while let Some(field) = self.parse_field_value()? { + fields.push(field); + } + + let brace = + self.require_token(Token::CloseBrace, "end of structure value")?; + + let sv = StructureExpr { + location: self.to_location(next.span).extend_to(&brace), + type_name, + fields, + }; + + Ok(Expression::Structure(sv)) + } + + Token::DoubleColon => { + let vname = self + .next()? + .ok_or_else(|| self.bad_eof("looking for enumeration value name"))?; + + let variant_name = match vname.token { + Token::TypeName(s) => { + let loc = self.to_location(vname.span.clone()); + Name::new(loc, s) + } + + _ => { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: vname.span, + token: vname.token, + expected: "enumeration value name".into(), + }); + } + }; + + let (argument, end_loc) = if let Some(maybe_paren) = self.next()? { + if matches!(maybe_paren.token, Token::OpenParen) { + let expr = self.parse_expression()?; + let closer = self + .require_token(Token::CloseParen, "after variant argument")?; + + (Some(Box::new(expr)), closer) + } else { + self.save(maybe_paren); + (None, self.to_location(vname.span)) + } + } else { + (None, self.to_location(vname.span)) + }; + + let ev = EnumerationExpr { + location: self.to_location(next.span).extend_to(&end_loc), + type_name, + variant_name, + argument, + }; + + Ok(Expression::Enumeration(ev)) + } + + _ => { + self.save(after_type_name); + Ok(Expression::Reference( + type_name.location().unwrap().clone(), + type_name, + )) + } + } + } + + Token::ValueName(n) | Token::PrimitiveValueName(n) => { + let location = self.to_location(next.span); + let name = Name::new(location.clone(), n); + Ok(Expression::Reference(location, name)) + } + + _ => { + self.save(next.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: next.span, + token: next.token, + expected: "some base expression or an open brace".into(), + }) + } + } + } + + /// Parse a type from the input stream. + /// + /// Obviously, there are a lot of ways for things to not be a valid + /// function type. As it can, this will try to leave things in the + /// original state on an error, but that won't always be possible. So + /// it's probably best to only try to call this when you're sure there + /// should be a type sitting there. + pub fn parse_type(&mut self) -> Result { + let mut args = Vec::new(); + + while let Ok(t) = self.parse_type_application() { + args.push(t); + } + + let Some(maybe_arrow) = self.next()? else { + match args.pop() { + None => { + return Err(ParserError::UnacceptableEof { + file: self.file.clone(), + place: "parsing function type or type".into(), + }); + } + + Some(t) if args.is_empty() => return Ok(t), + + Some(_) => { + return Err(ParserError::UnacceptableEof { + file: self.file.clone(), + place: "looking for '->' in function type".into(), + }); + } + } + }; + + if maybe_arrow.token == Token::Arrow { + let right = self.parse_type()?; + Ok(Type::Function(args, Box::new(right))) + } else if args.len() == 1 { + self.save(maybe_arrow); + Ok(args.pop().expect("length = 1 works")) + } else { + self.save(maybe_arrow.clone()); + let LocatedToken { token, span } = maybe_arrow; + + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span, + token, + expected: "'->' in function type".into(), + }) + } + } + + /// Parse a type application. + /// + /// Type applications must start with a type name (a capitalized variable + /// name). If we don't find one, we immediately error out. However if we + /// do find one, we will then eat as many base types as we can until we + /// run into an error. + /// + /// If we don't find a type name immediately, we will return an error but + /// leave the parse stream unchanged. If we parse a bunch of base types + /// correctly, the stream will be left at the start of the first non-base-type + /// token. However, this function can leave things in a weird state if there + /// is an open parenthesis that tries to enclose something that's not a type. + fn parse_type_application(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + let constructor = match token { + Token::TypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Type::Constructor(self.to_location(span), name) + } + Token::PrimitiveTypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Type::Primitive(self.to_location(span), name) + } + _ => { + self.save(LocatedToken { token, span }); + return self.parse_base_type(); + } + }; + + let mut args = vec![]; + + while let Ok(next_arg) = self.parse_base_type() { + args.push(next_arg); + } + + Ok(Type::Application(Box::new(constructor), args)) + } + + /// Parse a base type from the input stream. + /// + /// A "base type" is a type variable, a primitive type name, a type name, + /// or a parenthesized version of some other type. This function will return + /// an error if it can't find one of these things, and will *attempt* to + /// return the stream unmodified in the event of an error. However, if it + /// sees a parenthesis and tries to parse a nested, complex type, it may + /// not be possible to recover the state precisely. + fn parse_base_type(&mut self) -> Result { + let LocatedToken { token, span } = + self.next()?.ok_or_else(|| self.bad_eof("parsing type"))?; + + match token { + Token::TypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Constructor(self.to_location(span), name)) + } + Token::PrimitiveTypeName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Primitive(self.to_location(span), name)) + } + Token::ValueName(x) => { + let name = Name::new(self.to_location(span.clone()), x); + Ok(Type::Variable(self.to_location(span), name)) + } + Token::OpenParen => { + let t = self.parse_type()?; + let closer = self + .next()? + .ok_or_else(|| self.bad_eof("close paren in type"))?; + + if !matches!(closer.token, Token::CloseParen) { + return Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: closer.span, + token: closer.token, + expected: "close parenthesis to finish a type".into(), + }); + } + + Ok(t) + } + token => { + self.save(LocatedToken { + token: token.clone(), + span: span.clone(), + }); + + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span, + token, + expected: "type constructor, type variable, or primitive type".into(), + }) + } + } + } + + /// Try to parse a constant value from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + pub(crate) fn parse_constant(&mut self) -> Result { + let maybe_constant = self + .next()? + .ok_or_else(|| self.bad_eof("looking for a constant"))?; + + match maybe_constant.token { + Token::Integer(iwb) => Ok(ConstantValue::Integer( + self.to_location(maybe_constant.span), + iwb, + )), + Token::Character(c) => Ok(ConstantValue::Character( + self.to_location(maybe_constant.span), + c, + )), + Token::String(s) => Ok(ConstantValue::String( + self.to_location(maybe_constant.span), + s, + )), + _ => { + self.save(maybe_constant.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_constant.span, + token: maybe_constant.token, + expected: "constant value".into(), + }) + } + } + } + + /// Try to parse a name from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a name in {place}")))?; + + if let Token::ValueName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for a name in {place}"), + }) + } + } + + /// Try to parse a type name from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_type_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?; + + if let Token::TypeName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for a type name in {place}"), + }) + } + } + + /// Try to parse an operator from the input stream. + /// + /// If we don't find a name, the stream should be returned in the same state + /// at which it entered this function. + fn parse_operator_name(&mut self, place: &'static str) -> Result { + let maybe_name = self + .next()? + .ok_or_else(|| self.bad_eof(format!("looking for a type name in {place}")))?; + + if let Token::OperatorName(x) = maybe_name.token { + Ok(Name::new(self.to_location(maybe_name.span), x)) + } else { + self.save(maybe_name.clone()); + Err(ParserError::UnexpectedToken { + file: self.file.clone(), + span: maybe_name.span, + token: maybe_name.token, + expected: format!("looking for an operator name in {place}"), + }) + } + } +} diff --git a/src/syntax/parser.lalrpop b/src/syntax/parser.lalrpop new file mode 100644 index 0000000..5d3936f --- /dev/null +++ b/src/syntax/parser.lalrpop @@ -0,0 +1,70 @@ +use crate::syntax::*; +use crate::syntax::error::ParserError; +use crate::syntax::tokens::*; + +grammar(file_id: usize); + +extern { + type Location = usize; + type Error = ParserError; + + enum Token { + "(" => Token::OpenParen, + ")" => Token::CloseParen, + "[" => Token::OpenSquare, + "]" => Token::CloseSquare, + "{" => Token::OpenBrace, + "}" => Token::CloseBrace, + ";" => Token::Semi, + ":" => Token::Colon, + "," => Token::Comma, + "`" => Token::BackTick, + "\\" => Token::Lambda(_), + "->" => Token::Arrow, + + "" => Token::TypeName(), + "" => Token::ValueName(), + "" => Token::OperatorName(), + "" => Token::PrimitiveTypeName(), + "" => Token::PrimitiveValueName(), + "" => Token::Integer(), + "" => Token::Character(), + "" => Token::String(), + } +} + +pub Type: Type = { + FunctionType, +} + +FunctionType: Type = { + TypeApplication, + "->" => + Type::Function(Box::new(argtype), Box::new(ret)), +} + +TypeApplication: Type = { + BaseType, + "> => { + let constructor = Type::Constructor(Location::new(file_id, s..e), c); + Type::Application(Box::new(constructor), arguments) + }, + "> => { + let constructor = Type::Constructor(Location::new(file_id, s..e), c); + Type::Application(Box::new(constructor), arguments) + }, +} + +BaseType: Type = { + "> => + Type::Variable(Location::new(file_id, s..e), v), + "> => + Type::Primitive(Location::new(file_id, s..e), p), + "(" ")" => t, +} + +pub ConstantValue: ConstantValue = { + "> => ConstantValue::Integer(Location::new(file_id, s..e), x), + "> => ConstantValue::Character(Location::new(file_id, s..e), x), + "> => ConstantValue::String(Location::new(file_id, s..e), x), +} diff --git a/src/syntax/parser_tests.rs b/src/syntax/parser_tests.rs new file mode 100644 index 0000000..0a214a4 --- /dev/null +++ b/src/syntax/parser_tests.rs @@ -0,0 +1,1500 @@ +use crate::syntax::error::ParserError; +use crate::syntax::parse::Parser; +use crate::syntax::tokens::{Lexer, Token}; +use crate::syntax::*; + +#[test] +fn constants() { + let parse_constant = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_constant() + }; + + assert!(matches!( + parse_constant("16"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: None, + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0x10"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(16), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0o20"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(8), + value: 16, + } + )) + )); + assert!(matches!( + parse_constant("0b10000"), + Ok(ConstantValue::Integer( + _, + IntegerWithBase { + base: Some(2), + value: 16, + } + )) + )); + assert!( + matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x)) + if x == "foo") + ); + assert!(matches!( + parse_constant("'f'"), + Ok(ConstantValue::Character(_, 'f')) + )); +} + +#[test] +fn types() { + let parse_type = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_type() + }; + + assert!(matches!( + parse_type("Cons"), + Ok(Type::Application(cons, empty)) if + matches!(cons.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && + empty.is_empty() + )); + assert!(matches!( + parse_type("cons"), + Ok(Type::Variable(_, c)) if c.as_printed() == "cons" + )); + assert!(matches!( + parse_type("Cons a b"), + Ok(Type::Application(a, b)) + if matches!(a.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && + matches!(b.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1.as_printed() == "a" && b2.as_printed() == "b") + )); + assert!(matches!( + parse_type("a -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1.as_printed() == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") + )); + assert!(matches!( + parse_type("(a -> z)"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1)] if a1.as_printed() == "a") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") + )); + assert!(matches!( + parse_type("a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Variable(_, a1), Type::Variable(_, b1)] + if a1.as_printed() == "a" && b1.as_printed() == "b") && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") + )); + assert!(matches!( + parse_type("Cons a b -> z"), + Ok(Type::Function(a, z)) + if matches!(a.as_slice(), [Type::Application(cons, appargs)] + if matches!(cons.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && + matches!(appargs.as_slice(), [Type::Variable(_, b1), Type::Variable(_, b2)] + if b1.as_printed() == "a" && b2.as_printed() == "b")) && + matches!(z.as_ref(), Type::Variable(_, z1) if z1.as_printed() == "z") + )); + assert!(parse_type("Cons a b ->").is_err()); + assert!(parse_type("(Cons a b) (Cons a b)").is_err()); + assert!(parse_type("(Cons a b) (Cons a b) :").is_err()); +} + +#[test] +fn type_restrictions() { + let parse_tr = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_type_restrictions() + }; + + assert!(matches!( + parse_tr("restrict()"), + Ok(TypeRestrictions{ restrictions }) if restrictions.is_empty() + )); + + assert!(matches!( + parse_tr("restrict(prim%Cons a b)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Primitive(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); + + assert!(matches!( + parse_tr("restrict(Cons a b)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); + + assert!(matches!( + parse_tr("restrict(Cons a b,)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 1 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")))); + + assert!(parse_tr("restrict(cons a b,)").is_err()); + + assert!(parse_tr("restrict(,Cons a b,)").is_err()); + + assert!(matches!( + parse_tr("restrict(Cons a b, Monad m)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")) && + matches!(&restrictions[1], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Monad") && + arguments.len() == 1 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "m")))); + + assert!(matches!( + parse_tr("restrict(Cons a b, Monad m,)"), + Ok(TypeRestrictions { restrictions }) if restrictions.len() == 2 && + matches!(&restrictions[0], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Cons") && + arguments.len() == 2 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "a") && + matches!(&arguments[1], Type::Variable(_, x) if x.as_printed() == "b")) && + matches!(&restrictions[1], TypeRestriction { + constructor, + arguments, + } if matches!(constructor, Type::Constructor(_, x) if x.as_printed() == "Monad") && + arguments.len() == 1 && + matches!(&arguments[0], Type::Variable(_, x) if x.as_printed() == "m")))); +} + +#[test] +fn field_definition() { + let parse_fd = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_field_definition() + }; + + assert!(parse_fd("foo").is_err()); + assert!(matches!( + parse_fd("foo,"), + Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) + if name.as_printed() == "foo" + )); + assert!(matches!( + parse_fd("foo}"), + Ok(Some(StructureField{ name, export: ExportClass::Private, field_type: None, .. })) + if name.as_printed() == "foo" + )); + + assert!(matches!( + parse_fd("foo: Word8,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name.as_printed() == "foo" && + matches!(&field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && + args.is_empty()))); + + assert!(parse_fd("foo :: Word8,").is_err()); + assert!(parse_fd("foo: Word8;").is_err()); + + assert!(matches!( + parse_fd("foo: Cons a b,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name.as_printed() == "foo" && + matches!(&field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Cons") && + matches!(&args.as_slice(), &[Type::Variable(_, v1), Type::Variable(_, v2)] + if v1.as_printed() == "a" && v2.as_printed() == "b")))); + + assert!(matches!( + parse_fd("foo: a -> b,"), + Ok(Some(StructureField{ name, field_type, .. })) + if name.as_printed() == "foo" && + matches!(&field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a.as_printed() == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b.as_printed() == "b")))); + + assert!(matches!( + parse_fd("export foo: a -> b,"), + Ok(Some(StructureField{ name, export: ExportClass::Public, field_type, .. })) + if name.as_printed() == "foo" && + matches!(&field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a)] if a.as_printed() == "a") && + matches!(ret.as_ref(), Type::Variable(_, b) if b.as_printed() == "b")))); +} + +#[test] +fn structures() { + let parse_st = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_structure() + }; + + assert!(parse_st("structure { }").is_err()); + assert!(parse_st("structure {").is_err()); + assert!(parse_st("structure foo {}").is_err()); + + assert!(matches!( + parse_st("structure Foo {}"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && fields.is_empty())); + + assert!(matches!( + parse_st("structure Foo { bar }"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && + matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] + if name.as_printed() == "bar" && field_type.is_none()))); + + assert!(matches!( + parse_st("structure Foo { bar: Word8 }"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && + matches!(fields.as_slice(), &[StructureField { ref name, ref field_type, .. }] + if name.as_printed() == "bar" && + matches!(field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && + args.is_empty())))); + + assert!(matches!( + parse_st("structure Foo { bar: Word8, goo }"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && + matches!(field_type, Some(Type::Application(c, args)) + if matches!(c.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Word8") && + args.is_empty())))); + + assert!(matches!( + parse_st("structure Foo { bar: b c -> a, goo }"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && + matches!(field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] + if b.as_printed() == "b" && c.as_printed() == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a.as_printed() == "a"))))); + + assert!(matches!( + parse_st("structure Foo { bar: b c -> a, goo, }"), + Ok(StructureDef { name, fields, .. }) + if name.as_printed() == "Foo" && + matches!(fields.as_slice(), + &[StructureField { ref name, ref field_type, .. }, + StructureField { name: ref name2, field_type: None, .. }] + if name.as_printed() == "bar" && + name2.as_printed() == "goo" && + matches!(field_type, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, b), Type::Variable(_, c)] + if b.as_printed() == "b" && c.as_printed() == "c") && + matches!(ret.as_ref(), Type::Variable(_, a) if a.as_printed() == "a"))))); +} + +#[test] +fn enum_variant() { + let parse_ev = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_enum_variant() + }; + + assert!(matches!(parse_ev("foo"), Ok(None))); + assert!(matches!(parse_ev("foo,"), Ok(None))); + assert!(parse_ev("Cons foo,").is_err()); + assert!(matches!(parse_ev(""), Ok(None))); + + assert!(matches!(parse_ev("}"), Ok(None))); + + assert!(matches!( + parse_ev("Cons,"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name.as_printed() == "Cons" && argument.is_none())); + assert!(matches!( + parse_ev("Cons }"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name.as_printed() == "Cons" && argument.is_none())); + assert!(matches!( + parse_ev("Cons, }"), + Ok(Some(EnumerationVariant { name, argument, .. })) + if name.as_printed() == "Cons" && argument.is_none())); + + assert!(matches!( + parse_ev("Cons(Pair a),"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name.as_printed() == "Cons" && + matches!(argument, Some(Type::Application(typef, args)) + if matches!(typef.as_ref(), Type::Constructor(_, name) + if name.as_printed() == "Pair") && + matches!(&args.as_slice(), &[Type::Variable(_, argname)] + if argname.as_printed() == "a")))); + assert!(matches!( + parse_ev("Cons(Pair a) }"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name.as_printed() == "Cons" && + matches!(argument, Some(Type::Application(typef, args)) + if matches!(typef.as_ref(), Type::Constructor(_, name) + if name.as_printed() == "Pair") && + matches!(&args.as_slice(), &[Type::Variable(_, argname)] + if argname.as_printed() == "a")))); + + assert!(matches!( + parse_ev("Cons(a b -> c) }"), + Ok(Some(EnumerationVariant { name, ref argument, .. })) + if name.as_printed() == "Cons" && + matches!(argument, Some(Type::Function(args, ret)) + if matches!(&args.as_slice(), &[Type::Variable(_, a), Type::Variable(_, b)] + if a.as_printed() == "a" && b.as_printed() == "b") && + matches!(ret.as_ref(), Type::Variable(_, c) if c.as_printed() == "c")))); +} + +#[test] +fn enumerations() { + let parse_en = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_enumeration() + }; + + assert!(parse_en("enumeration { }").is_err()); + assert!(parse_en("enumeration {").is_err()); + assert!(parse_en("enumeration").is_err()); + + assert!(matches!( + parse_en("enumeration Empty { }"), + Ok(EnumerationDef { name, variants, .. }) + if name.as_printed() == "Empty" && variants.is_empty())); + assert!(matches!( + parse_en("enumeration Alternates { A, B }"), + Ok(EnumerationDef { name, variants, .. }) + if name.as_printed() == "Alternates" && + matches!(&variants.as_slice(), &[ + EnumerationVariant { name: name1, argument: arg1, ..}, + EnumerationVariant { name: name2, argument: arg2, ..}, + ] if name1.as_printed() == "A" && arg1.is_none() && + name2.as_printed() == "B" && arg2.is_none()))); + assert!(parse_en("enumeration Alternates { A").is_err()); + assert!(parse_en("enumeration Alternates { A; B }").is_err()); + assert!(matches!( + parse_en("enumeration Alternates { A, B, }"), + Ok(EnumerationDef { name, variants, .. }) + if name.as_printed() == "Alternates" && + matches!(&variants.as_slice(), &[ + EnumerationVariant { name: name1, argument: arg1, ..}, + EnumerationVariant { name: name2, argument: arg2, ..}, + ] if name1.as_printed() == "A" && arg1.is_none() && + name2.as_printed() == "B" && arg2.is_none()))); +} + +#[test] +fn expressions() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(parse_ex("").is_err()); + assert!(matches!( + parse_ex("x"), + Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); + assert!(matches!( + parse_ex("X"), + Ok(Expression::Reference(_,n)) if n.as_printed() == "X")); + assert!(matches!( + parse_ex("(x)"), + Ok(Expression::Reference(_,n)) if n.as_printed() == "x")); + assert!(matches!( + parse_ex("'c'"), + Ok(Expression::Value(ConstantValue::Character(_, _))) + )); + assert!(matches!( + parse_ex("\"c\""), + Ok(Expression::Value(ConstantValue::String(_, _))) + )); + assert!(matches!( + parse_ex("1"), + Ok(Expression::Value(ConstantValue::Integer(_, _))) + )); + assert!(matches!( + parse_ex("(1)"), + Ok(Expression::Value(ConstantValue::Integer(_, _))) + )); +} + +#[test] +fn enumeration_values() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(parse_ex("Hello::world").is_err()); + assert!(parse_ex("Hello::world(a,b)").is_err()); + assert!(matches!( + parse_ex("Hello::World"), + Ok(Expression::Enumeration(ev)) + if ev.type_name.as_printed() == "Hello" && + ev.variant_name.as_printed() == "World" && + ev.argument.is_none())); + assert!(matches!( + parse_ex("Hello::World(a)"), + Ok(Expression::Enumeration(ev)) + if ev.type_name.as_printed() == "Hello" && + ev.variant_name.as_printed() == "World" && + ev.argument.is_some())); + assert!(matches!( + parse_ex("Hello::World + 1"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(_, n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Enumeration(_), + Expression::Value(_) + ]))); +} + +#[test] +fn structure_value() { + let parse_st = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(parse_st("Foo{").is_err()); + assert!(parse_st("Foo{ , }").is_err()); + assert!(parse_st("Foo{ foo, }").is_err()); + assert!(parse_st("Foo{ foo: , }").is_err()); + assert!(parse_st("Foo{ , foo: 1, }").is_err()); + assert!(matches!( + parse_st("Foo{ foo: 1 }"), + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field, value }] + if field.as_printed() == "foo" && + matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, }"), + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field, value }] + if field.as_printed() == "foo" && + matches!(value, Expression::Value(ConstantValue::Integer(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, bar: \"foo\" }"), + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field: f1, value: v1 }, + FieldValue{ field: f2, value: v2 }] + if f1.as_printed() == "foo" && + f2.as_printed() == "bar" && + matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && + matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); + assert!(matches!( + parse_st("Foo{ foo: 1, bar: \"foo\", }"), + Ok(Expression::Structure(sv)) + if sv.type_name.as_printed() == "Foo" && + matches!(sv.fields.as_slice(), [FieldValue{ field: f1, value: v1 }, + FieldValue{ field: f2, value: v2 }] + if f1.as_printed() == "foo" && + f2.as_printed() == "bar" && + matches!(v1, Expression::Value(ConstantValue::Integer(_,_))) && + matches!(v2, Expression::Value(ConstantValue::String(_,_)))))); + assert!(parse_st("Foo{ foo: 1,, bar: \"foo\", }").is_err()); +} + +#[test] +fn infix_and_precedence() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 6); + result.add_infix_precedence("*", parse::Associativity::Right, 7); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("0"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("(0)"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("((0))"), + Ok(Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value, .. }))) + if value == 0)); + assert!(matches!( + parse_ex("1 + 2"), + Ok(Expression::Call(plus, CallKind::Infix, args)) + if matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2))); + assert!(matches!( + parse_ex("1 + 2 + 3"), + Ok(Expression::Call(plus, CallKind::Infix, args)) + if matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(innerplus, CallKind::Infix, inner_args), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v3 == 3 && + matches!(innerplus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2)))); + assert!(matches!( + parse_ex("1 * 2 * 3"), + Ok(Expression::Call(times, CallKind::Infix, args)) + if matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Call(innertimes, CallKind::Infix, inner_args), + ] if *v1 == 1 && + matches!(innertimes.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v2 == 2 && *v3 == 3)))); + + assert!(matches!( + parse_ex("1 + 2 * 3 + 4"), + Ok(Expression::Call(plus_right, CallKind::Infix, outer_args)) if + matches!(plus_right.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(plus_left, CallKind::Infix, left_args), + Expression::Value(ConstantValue::Integer(_, v4)) + ] if + matches!(v4, IntegerWithBase{ value: 4, .. }) && + matches!(plus_left.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Call(times, CallKind::Infix, times_args) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(times_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v2)), + Expression::Value(ConstantValue::Integer(_, v3)) + ] if + matches!(v2, IntegerWithBase{ value: 2, .. }) && + matches!(v3, IntegerWithBase{ value: 3, .. })))))); + + assert!(matches!( + parse_ex("1 * 2 + 3 * 4"), + Ok(Expression::Call(plus, CallKind::Infix, outer_args)) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(left_times, CallKind::Infix, left_args), + Expression::Call(right_times, CallKind::Infix, right_args) + ] if + matches!(left_times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(right_times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Value(ConstantValue::Integer(_, v2)), + ] if + matches!(v1, IntegerWithBase { value: 1, .. }) && + matches!(v2, IntegerWithBase { value: 2, .. })) && + matches!(right_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v3)), + Expression::Value(ConstantValue::Integer(_, v4)), + ] if + matches!(v3, IntegerWithBase { value: 3, .. }) && + matches!(v4, IntegerWithBase { value: 4, .. }))))); +} + +#[test] +fn calls() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 6); + result.add_infix_precedence("*", parse::Associativity::Right, 7); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("f()"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + args.is_empty())); + assert!(parse_ex("f(").is_err()); + assert!(parse_ex("f(a").is_err()); + assert!(parse_ex("f(a,b").is_err()); + assert!(parse_ex("f(a,b,").is_err()); + assert!(parse_ex("f(a,b ::").is_err()); + assert!(matches!( + parse_ex("f(a)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); + assert!(matches!( + parse_ex("f(a,b)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(_,a), + Expression::Reference(_,b), + ] if a.as_printed() == "a" && b.as_printed() == "b"))); + assert!(matches!( + parse_ex("f(a,b,)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(_,a), + Expression::Reference(_,b), + ] if a.as_printed() == "a" && b.as_printed() == "b"))); + assert!(matches!( + parse_ex("f(A,b,)"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Reference(_,a), + Expression::Reference(_,b), + ] if a.as_printed() == "A" && b.as_printed() == "b"))); + assert!(parse_ex("f(,a,b,)").is_err()); + assert!(parse_ex("f(a,,b,)").is_err()); + assert!(parse_ex("f(a,b,,)").is_err()); + + assert!(matches!( + parse_ex("f()()"), + Ok(Expression::Call(f, CallKind::Normal, args)) if + matches!(f.as_ref(), Expression::Call(inner, CallKind::Normal, inner_args) if + matches!(inner.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + inner_args.is_empty()) && + args.is_empty())); + + assert!(matches!( + parse_ex("f() + 1"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(subcall, CallKind::Normal, subargs), + Expression::Value(ConstantValue::Integer(_, v1)) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(subcall.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + subargs.is_empty()))); + + assert!(matches!( + parse_ex("f(a + b, c*d)"), + Ok(Expression::Call(eff, CallKind::Normal, args)) if + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Call(plus, CallKind::Infix, pargs), + Expression::Call(times, CallKind::Infix, targs), + ] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(_,a), Expression::Reference(_,b) ] if + a.as_printed() == "a" && b.as_printed() == "b") && + matches!(targs.as_slice(), [ Expression::Reference(_,c), Expression::Reference(_,d) ] if + c.as_printed() == "c" && d.as_printed() == "d")))); + + assert!(matches!( + parse_ex("f(a + b, c*d,)"), + Ok(Expression::Call(eff, CallKind::Normal, args)) if + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(args.as_slice(), [ + Expression::Call(plus, CallKind::Infix, pargs), + Expression::Call(times, CallKind::Infix, targs), + ] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(pargs.as_slice(), [ Expression::Reference(_,a), Expression::Reference(_,b) ] if + a.as_printed() == "a" && b.as_printed() == "b") && + matches!(targs.as_slice(), [ Expression::Reference(_,c), Expression::Reference(_,d) ] if + c.as_printed() == "c" && d.as_printed() == "d")))); + + assert!(matches!( + parse_ex("3 + f(1 + 2)"), + Ok(Expression::Call(plus, CallKind::Infix, args)) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v3)), + Expression::Call(eff, CallKind::Normal, fargs) + ] if + matches!(v3, IntegerWithBase{ value: 3, .. }) && + matches!(eff.as_ref(), Expression::Reference(_,n) if n.as_printed() == "f") && + matches!(fargs.as_slice(), [Expression::Call(p, CallKind::Infix, pargs)] if + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if + matches!(v1, ConstantValue::Integer(_, IntegerWithBase { value: 1, .. })) && + matches!(v2, ConstantValue::Integer(_, IntegerWithBase { value: 2, .. }))))))); + + assert!(matches!( + parse_ex("(f . g)(1 + 2)"), + Ok(Expression::Call(fg, CallKind::Normal, args)) if + matches!(fg.as_ref(), Expression::Call(dot, CallKind::Infix, fgargs) if + matches!(dot.as_ref(), Expression::Reference(_,n) if n.as_printed() == ".") && + matches!(fgargs.as_slice(), [Expression::Reference(_,f), Expression::Reference(_,g)] if + f.as_printed() == "f" && g.as_printed() == "g")) && + matches!(args.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [Expression::Value(v1), Expression::Value(v2)] if + matches!(v1, ConstantValue::Integer(_, IntegerWithBase{ value: 1, .. })) && + matches!(v2, ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })))))); + + assert!(matches!( + parse_ex("a + b(2 + 3) * c"), + Ok(Expression::Call(plus, CallKind::Infix, pargs)) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [ + Expression::Reference(_,a), + Expression::Call(times, CallKind::Infix, targs) + ] if a.as_printed() == "a" && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(targs.as_slice(), [ + Expression::Call(b, CallKind::Normal, bargs), + Expression::Reference(_,c), + ] if c.as_printed() == "c" && + matches!(b.as_ref(), Expression::Reference(_,n) if n.as_printed() == "b") && + matches!(bargs.as_slice(), [Expression::Call(plus, CallKind::Infix, pargs)] if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(pargs.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: 3, .. })) + ])))))); +} + +#[test] +fn prefix_and_postfix() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.add_infix_precedence("+", parse::Associativity::Left, 4); + result.add_infix_precedence("*", parse::Associativity::Left, 8); + result.add_prefix_precedence("++", 6); + result.add_postfix_precedence("++", 6); + result.add_prefix_precedence("--", 7); + result.add_postfix_precedence("--", 7); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("++a"), + Ok(Expression::Call(pp, CallKind::Prefix, args)) if + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); + + assert!(matches!( + parse_ex("a--"), + Ok(Expression::Call(pp, CallKind::Postfix, args)) if + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a"))); + + // the prefix is weaker than the postfix, so it should be the outside + // operatotr + assert!(matches!( + parse_ex("++a--"), + Ok(Expression::Call(pp, CallKind::Prefix, args)) if + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Call(mm, CallKind::Postfix, args)] if + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); + + // the prefix is stronger than the postfix, so it should be the inside + // operator + assert!(matches!( + parse_ex("--a++"), + Ok(Expression::Call(pp, CallKind::Postfix, args)) if + matches!(pp.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Call(mm, CallKind::Prefix, args)] if + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "--") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); + + assert!(matches!( + parse_ex("a++ + b"), + Ok(Expression::Call(p, CallKind::Infix, args)) if + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(mm, CallKind::Postfix, args), + Expression::Reference(_,n) + ] if n.as_printed() == "b" && + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "a")))); + + assert!(matches!( + parse_ex("a + ++ b"), + Ok(Expression::Call(p, CallKind::Infix, args)) if + matches!(p.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Reference(_,n), + Expression::Call(mm, CallKind::Prefix, args), + ] if n.as_printed() == "a" && + matches!(mm.as_ref(), Expression::Reference(_,n) if n.as_printed() == "++") && + matches!(args.as_slice(), [Expression::Reference(_,n)] if n.as_printed() == "b")))); + + assert!(matches!( + parse_ex("a * ++ b"), + Err(ParserError::UnexpectedToken{ token: Token::OperatorName(pp), .. }) + if pp == "++")); + + // this is a little bit of a weird case. + assert!(parse_ex("**").is_err()); +} + +#[test] +fn blocks() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("{}"), + Ok(Expression::Block(_, void)) if + matches!(void.as_slice(), [Statement::Expression(call)] if + matches!(call, Expression::Call(void, CallKind::Normal, vargs) if + matches!(void.as_ref(), Expression::Reference(_,n) if + n.as_printed() == "%prim%void") && + vargs.is_empty())))); + assert!(matches!( + parse_ex("{ x }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if + n.as_printed() == "x"))); + assert!(matches!( + parse_ex("{ x; }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(_,n)), + Statement::Expression(Expression::Call(primv, CallKind::Normal, vargs)), + ] if n.as_printed() == "x" && vargs.is_empty() && + matches!(primv.as_ref(), Expression::Reference(_,n) if + n.as_printed() == "%prim%void")))); + assert!(matches!( + parse_ex("{ x;;; y }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(_,x)), + Statement::Expression(Expression::Reference(_,y)), + ] if x.as_printed() == "x" && y.as_printed() == "y"))); + assert!(matches!( + parse_ex("{ x; y }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [ + Statement::Expression(Expression::Reference(_,x)), + Statement::Expression(Expression::Reference(_,y)), + ] if x.as_printed() == "x" && y.as_printed() == "y"))); +} + +#[test] +fn bindings() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("{ let x = y; }"), + Ok(Expression::Block(_, x)) if + matches!(x.as_slice(), [Statement::Binding(b), Statement::Expression(_)] if + !b.mutable && + b.variable.as_printed() == "x" && + matches!(b.value, Expression::Reference(_,ref n) if n.as_printed() == "y")))); +} + +#[test] +fn conditionals() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("if x { y } else { z }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Reference(_,n) if n.as_printed() == "x") && + matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if + n.as_printed() == "y")) && + matches!(cond.alternative.as_ref(), Some(expr) if + matches!(expr.as_ref(), Expression::Block(_, ast) if + matches!(ast.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if + n.as_printed() == "z"))))); + + assert!(matches!( + parse_ex("if x { y }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Reference(_,n) if n.as_printed() == "x") && + matches!(cond.consequent.as_ref(), Expression::Block(_, cs) if + matches!(cs.as_slice(), [Statement::Expression(Expression::Reference(_,n))] if + n.as_printed() == "y")) && + cond.alternative.is_none())); + + assert!(parse_ex("if x v { z }").is_err()); + + assert!(matches!( + parse_ex("if x + y { z }"), + Ok(Expression::Conditional(cond)) if + matches!(cond.test.as_ref(), Expression::Call(_, CallKind::Infix, _)))); +} + +#[test] +#[allow(clippy::get_first)] +fn patterns() { + let parse_pat = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_pattern() + }; + + assert!(matches!( + parse_pat("1"), + Ok(Pattern::Constant(ConstantValue::Integer(_, + IntegerWithBase { value, .. }))) if + value == 1)); + assert!(matches!( + parse_pat("x"), + Ok(Pattern::Variable(n)) if n.as_printed() == "x")); + assert!(matches!( + parse_pat("Cons::Pair(pair)"), + Ok(Pattern::EnumerationValue(EnumerationPattern{ + type_name, variant_name, argument: Some(subpat), .. + })) if + type_name.as_printed() == "Cons" && + variant_name.as_printed() == "Pair" && + matches!(subpat.as_ref(), Pattern::Variable(p) if + p.as_printed() == "pair"))); + assert!(matches!( + parse_pat("Structure{ field, other: something }"), + Ok(Pattern::Structure(StructurePattern { type_name, fields, .. })) if + type_name.as_printed() == "Structure" && + fields.len() == 2 && + matches!(fields.get(0), Some((n, None)) if n.as_printed() == "field") && + matches!(fields.get(1), Some((n, Some(Pattern::Variable(s)))) if + n.as_printed() == "other" && + s.as_printed() == "something"))); + assert!(matches!( + parse_pat("Enumeration::Value(Structure { field, })"), + Ok(Pattern::EnumerationValue(EnumerationPattern { + type_name, variant_name, argument: Some(subpat), .. + })) if + type_name.as_printed() == "Enumeration" && + variant_name.as_printed() == "Value" && + matches!(subpat.as_ref(), Pattern::Structure(StructurePattern { + type_name, fields, .. + }) if + type_name.as_printed() == "Structure" && + fields.len() == 1 && + matches!(fields.first(), Some((f, None)) if + f.as_printed() == "field")))); + assert!(matches!( + parse_pat("Structure { field: Enumeration::Value, }"), + Ok(Pattern::Structure(StructurePattern { + type_name, fields, .. + })) if + type_name.as_printed() == "Structure" && + fields.len() == 1 && + matches!(fields.first(), Some((f, Some(subpat))) if + f.as_printed() == "field" && + matches!(subpat, Pattern::EnumerationValue(EnumerationPattern { + type_name, variant_name, argument: None, .. + }) if + type_name.as_printed() == "Enumeration" && + variant_name.as_printed() == "Value")))); +} + +#[test] +fn definitions() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("x = 1;"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: None, value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(parse_def("x = 1").is_err()); + assert!(matches!( + parse_def("x: Integer = 1;"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export x: Integer = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export restrict() x: Integer = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(_), value, .. }) if + name.as_printed() == "x" && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("export restrict(Numeric a) x: a = 1;"), + Ok(Definition { export: ExportClass::Public, type_restrictions, definition, .. }) if + matches!(&type_restrictions, TypeRestrictions { restrictions } if + restrictions.len() == 1 && + matches!(restrictions.first(), Some(TypeRestriction{ constructor, arguments }) if + matches!(constructor, Type::Constructor(_, n) if n.as_printed() == "Numeric") && + matches!(arguments.as_slice(), [Type::Variable(_, n)] if n.as_printed() == "a"))) && + matches!(&definition, Def::Value(ValueDef { name, mtype: Some(t), value, .. }) if + name.as_printed() == "x" && + matches!(t, Type::Variable(_, n) if n.as_printed() == "a") && + matches!(value, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 1)))); + assert!(matches!( + parse_def("function() { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("function() { 1 }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("function(): Integer { 1 }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_some() && + body.len() == 1))); +} + +#[test] +fn functions() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("function() { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "function" && + arguments.is_empty() && + return_type.is_none() && + body.len() == 1))); + assert!(matches!( + parse_def("fun(a) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [FunctionArg{ name, arg_type: None }] if + name.as_printed() == "a")))); + assert!(matches!( + parse_def("fun(a,b) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a,b,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a:U8,b,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: Some(Type::Application(atype, atype_args)) }, + FunctionArg{ name: bname, arg_type: None } + ] if + aname.as_printed() == "a" && + matches!(atype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + atype_args.is_empty() && + bname.as_printed() == "b")))); + assert!(matches!( + parse_def("fun(a:U8,b:U8,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: Some(Type::Application(atype, atype_args)) }, + FunctionArg{ name: bname, arg_type: Some(Type::Application(btype, btype_args)) } + ] if + aname.as_printed() == "a" && + matches!(atype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + atype_args.is_empty() && + bname.as_printed() == "b" && + matches!(btype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + btype_args.is_empty())))); + assert!(matches!( + parse_def("fun(a,b:U8,) { }"), + Ok(Definition { export: ExportClass::Private, type_restrictions, definition, .. }) if + type_restrictions.is_empty() && + matches!(&definition, Def::Function(FunctionDef { name, arguments, return_type, body, .. }) if + name.as_printed() == "fun" && + return_type.is_none() && + body.len() == 1 && + matches!(arguments.as_slice(), [ + FunctionArg{ name: aname, arg_type: None }, + FunctionArg{ name: bname, arg_type: Some(Type::Application(btype, btype_args)) } + ] if + aname.as_printed() == "a" && + bname.as_printed() == "b" && + matches!(btype.as_ref(), Type::Constructor(_, n) if n.as_printed() == "U8") && + btype_args.is_empty())))); + assert!(parse_def("fun(a,,b,) { }").is_err()); +} + +#[test] +fn definition_types() { + let parse_def = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_definition() + }; + + assert!(matches!( + parse_def("x: prim%U8 = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + args.is_empty() && + matches!(f.as_ref(), Type::Primitive(_, name) if + name.as_printed() == "U8"))))); + assert!(matches!( + parse_def("x: Stupid Monad prim%U8 = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + matches!(f.as_ref(), Type::Constructor(_, name) if + name.as_printed() == "Stupid") && + matches!(args.as_slice(), [Type::Constructor(_, cname), Type::Primitive(_, pname)] if + cname.as_printed() == "Monad" && + pname.as_printed() == "U8"))))); + assert!(matches!( + parse_def("x: Stupid (Monad prim%U8) = 1;"), + Ok(Definition { definition, .. }) if + matches!(&definition, Def::Value(ValueDef{ mtype, .. }) if + matches!(mtype, Some(Type::Application(f, args)) if + matches!(f.as_ref(), Type::Constructor(_, name) if + name.as_printed() == "Stupid") && + matches!(args.as_slice(), [Type::Application(cname, args2)] if + matches!(cname.as_ref(), Type::Constructor(_, c) if c.as_printed() == "Monad") && + matches!(args2.as_slice(), [Type::Primitive(_, pname)] if + pname.as_printed() == "U8")))))); + assert!(parse_def("x: Stupid (Monad prim%U8 = 1;").is_err()); +} + +#[test] +fn operators() { + let parse = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_module() + }; + + let all_the_operators = r#" +prefix operator - -> negate; +postfix operator ++ -> mutable_add; +infix left operator + -> sum; +infix right operator - -> subtract; +infix operator * at 8 -> multiply; +postfix operator ! at 3 -> factorial; +prefix operator $$ at 1 -> money; +"#; + + assert!(parse(all_the_operators).is_ok()); + + assert!(parse("left prefix operator - -> negate;").is_err()); + assert!(parse("right prefix operator - -> negate;").is_err()); + assert!(parse("right infix operator - -> negate;").is_err()); + assert!(parse("left infix operator - -> negate;").is_err()); + assert!(parse("infix operator at 8 - -> negate;").is_err()); + assert!(parse("infix operator * at 16 -> multiply;").is_err()); + assert!(parse("infix operator * at apple -> multiply;").is_err()); + + // these are designed to replicate the examples in the infix_and_precedence + // tests, but with the precedence set automatically by the parser. + let plus_and_times = |expr| { + format!( + r#" +infix left operator + at 6 -> add; +infix right operator * at 7 -> mul; + +x = {expr}; +"# + ) + }; + + let plus_example = plus_and_times("1 + 2 + 3"); + assert!(matches!( + parse(&plus_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(plus, CallKind::Infix, args) if + matches!(plus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(args.as_slice(), [ + Expression::Call(innerplus, CallKind::Infix, inner_args), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v3 == 3 && + matches!(innerplus.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })) + ] if *v1 == 1 && *v2 == 2))))))); + + let times_example = plus_and_times("1 * 2 * 3"); + assert!(matches!( + parse(×_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(times, CallKind::Infix, args) if + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v1, .. })), + Expression::Call(innertimes, CallKind::Infix, inner_args), + ] if *v1 == 1 && + matches!(innertimes.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(inner_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v2, .. })), + Expression::Value(ConstantValue::Integer(_, IntegerWithBase{ value: v3, .. })) + ] if *v2 == 2 && *v3 == 3))))))); + + let mixed_example = plus_and_times("1 + 2 * 3 + 4"); + assert!(matches!( + parse(&mixed_example), + Ok(Module { definitions }) if + matches!(definitions.last(), Some(Definition{ definition, .. }) if + matches!(definition, Def::Value(ValueDef{ value, .. }) if + matches!(value, Expression::Call(plus_right, CallKind::Infix, outer_args) if + matches!(plus_right.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(outer_args.as_slice(), [ + Expression::Call(plus_left, CallKind::Infix, left_args), + Expression::Value(ConstantValue::Integer(_, v4)) + ] if + matches!(v4, IntegerWithBase{ value: 4, .. }) && + matches!(plus_left.as_ref(), Expression::Reference(_,n) if n.as_printed() == "+") && + matches!(left_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v1)), + Expression::Call(times, CallKind::Infix, times_args) + ] if + matches!(v1, IntegerWithBase{ value: 1, .. }) && + matches!(times.as_ref(), Expression::Reference(_,n) if n.as_printed() == "*") && + matches!(times_args.as_slice(), [ + Expression::Value(ConstantValue::Integer(_, v2)), + Expression::Value(ConstantValue::Integer(_, v3)) + ] if + matches!(v2, IntegerWithBase{ value: 2, .. }) && + matches!(v3, IntegerWithBase{ value: 3, .. }))))))))); +} + +#[test] +fn pattern_match() { + let parse_ex = |str| { + let lexer = Lexer::from(str); + let mut result = Parser::new("test", lexer); + result.parse_expression() + }; + + assert!(matches!( + parse_ex("match x { }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + cases.is_empty())); + assert!(matches!( + parse_ex("match x { 1 -> 2 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { 1 -> 2, }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { 1 -> 2, 3 -> 4 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [mcase1, mcase2] if + matches!(mcase1, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 1) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)) && + matches!(mcase2, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::Constant(ConstantValue::Integer(_, iwb)) if + iwb.value == 3) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 4))))); + assert!(matches!( + parse_ex("match x { y -> 2, }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase { pattern, consequent }] if + matches!(pattern, Pattern::Variable(n) if n.as_printed() == "y") && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)))); + assert!(matches!( + parse_ex("match x { Option::None -> 2, Option::Some(x) -> 4 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [mcase1, mcase2] if + matches!(mcase1, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::EnumerationValue(ep) if + matches!(ep, EnumerationPattern{ type_name, variant_name, argument: None, .. } if + type_name.as_printed() == "Option" && + variant_name.as_printed() == "None")) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 2)) && + matches!(mcase2, MatchCase { pattern, consequent } if + matches!(pattern, Pattern::EnumerationValue(ep) if + matches!(ep, EnumerationPattern{ type_name, variant_name, argument: Some(subp), .. } if + type_name.as_printed() == "Option" && + variant_name.as_printed() == "Some" && + matches!(subp.as_ref(), Pattern::Variable(n) if n.as_printed() == "x"))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 4))))); + assert!(matches!( + parse_ex("match x { Foo{ a, b: 2, c: d } -> 6 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase{ pattern, consequent }] if + matches!(pattern, Pattern::Structure(StructurePattern{ type_name, fields, .. }) if + type_name.as_printed() == "Foo" && + matches!(fields.as_slice(), [(field1, None), (field2, Some(pat2)), (field3, Some(pat3))] if + field1.as_printed() == "a" && + field2.as_printed() == "b" && + field3.as_printed() == "c" && + matches!(pat2, Pattern::Constant(ConstantValue::Integer(_, iwb)) if iwb.value == 2) && + matches!(pat3, Pattern::Variable(n) if n.as_printed() == "d"))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 6)))); + assert!(matches!( + parse_ex("match x { Foo{ a, b: 2, c } -> 6 }"), + Ok(Expression::Match(MatchExpr{ value, cases, .. })) if + matches!(value.as_ref(), Expression::Reference(_, n) if n.as_printed() == "x") && + matches!(cases.as_slice(), [MatchCase{ pattern, consequent }] if + matches!(pattern, Pattern::Structure(StructurePattern{ type_name, fields, .. }) if + type_name.as_printed() == "Foo" && + matches!(fields.as_slice(), [(field1, None), (field2, Some(pat2)), (field3, None)] if + field1.as_printed() == "a" && + field2.as_printed() == "b" && + field3.as_printed() == "c" && + matches!(pat2, Pattern::Constant(ConstantValue::Integer(_, iwb)) if iwb.value == 2))) && + matches!(consequent, Expression::Value(ConstantValue::Integer(_, iwb)) if + iwb.value == 6)))); + + assert!(parse_ex("match x { Foo -> 3 }").is_err()); + assert!(parse_ex("match x { (4) -> 3 }").is_err()); + assert!(parse_ex("match x { Foo{ 3, x } -> 3 }").is_err()); + assert!(parse_ex("match x { Foo{ x").is_err()); + assert!(parse_ex("match x { Foo{ x: 3").is_err()); + assert!(parse_ex("match x { Foo{ x:: 3").is_err()); + assert!(parse_ex("match x { Foo{ x: 3 4 } -> 4 }").is_err()); +} diff --git a/src/syntax/print.rs b/src/syntax/print.rs new file mode 100644 index 0000000..71a0242 --- /dev/null +++ b/src/syntax/print.rs @@ -0,0 +1,70 @@ +use crate::syntax::ast::{ConstantValue, Type}; +#[cfg(test)] +use crate::syntax::parse::Parser; +#[cfg(test)] +use crate::syntax::tokens::Lexer; +use pretty::{DocAllocator, Pretty}; + +impl<'a, D: ?Sized + DocAllocator<'a, A>, A: 'a> Pretty<'a, D, A> for Type { + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + Type::Constructor(_, n) => allocator.as_string(n), + Type::Variable(_, n) => allocator.as_string(n), + Type::Primitive(_, n) => allocator.text("prim%").append(allocator.as_string(n)), + + Type::Application(c, args) => c + .pretty(allocator) + .append(allocator.space()) + .append(allocator.intersperse(args, " ")), + + Type::Function(args, ret) => allocator + .intersperse(args, " ") + .append(allocator.space()) + .append(ret.pretty(allocator)), + } + } +} + +impl<'a, D: ?Sized + DocAllocator<'a, A>, A: 'a> Pretty<'a, D, A> for ConstantValue { + fn pretty(self, allocator: &'a D) -> pretty::DocBuilder<'a, D, A> { + match self { + ConstantValue::String(_, x) => allocator.text(format!("{x:?}")), + ConstantValue::Character(_, c) => allocator.text(format!("{c:?}")), + ConstantValue::Integer(_, iwb) => match iwb.base { + None => allocator.as_string(iwb.value), + Some(2) => allocator.text(format!("0b{:b}", iwb.value)), + Some(8) => allocator.text(format!("0o{:o}", iwb.value)), + Some(10) => allocator.text(format!("0d{}", iwb.value)), + Some(16) => allocator.text(format!("0x{:x}", iwb.value)), + Some(x) => panic!("Illegal base {x} for integer constant."), + }, + } + } +} + +proptest::proptest! { + #[test] + fn constants(x: ConstantValue) { + let allocator: pretty::Arena = pretty::Arena::new(); + let docbuilder = x.clone().pretty(&allocator); + let mut string_version = String::new(); + docbuilder.render_fmt(80, &mut string_version).expect("can render to string"); + let lexer = Lexer::from(string_version.as_str()); + let mut parser = Parser::new("test", lexer); + let roundtripped = parser.parse_constant().expect("can parse constant"); + proptest::prop_assert_eq!(x, roundtripped); + } + +// #[test] +// fn types(x: Type) { +// let allocator: pretty::Arena = pretty::Arena::new(); +// let docbuilder = x.clone().pretty(&allocator); +// let mut string_version = String::new(); +// docbuilder.render_fmt(80, &mut string_version).expect("can render to string"); +// println!("String version: {string_version:?}"); +// let lexer = Lexer::from(string_version.as_str()); +// let mut parser = Parser::new("test", lexer); +// let roundtripped = parser.parse_type().expect("can parse constant"); +// proptest::prop_assert_eq!(x, roundtripped); +// } +} diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs new file mode 100644 index 0000000..ad096c3 --- /dev/null +++ b/src/syntax/tokens.rs @@ -0,0 +1,882 @@ +use crate::syntax::IntegerWithBase; +use crate::syntax::error::LexerError; +use proptest_derive::Arbitrary; +use std::fmt; +use std::ops::Range; +use std::str::CharIndices; + +#[derive(Clone)] +pub struct LocatedToken { + pub token: Token, + pub span: Range, +} + +/// A single token of the input stream; used to help the parsing function over +/// more concrete things than bytes. +/// +/// The [`std::fmt::Display`] implementation is designed to round-trip, so those +/// needing a more regular or descriptive option should consider using the +/// [`std::fmt::Debug`] implementation instead. +#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)] +pub enum Token { + OpenParen, + CloseParen, + OpenSquare, + CloseSquare, + OpenBrace, + CloseBrace, + Semi, + Colon, + DoubleColon, + Comma, + BackTick, + Arrow, + Lambda(bool), + + TypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + ValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + OperatorName( + #[proptest( + regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*", + filter = "|x| x != \"->\"" + )] + String, + ), + + PrimitiveTypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String), + PrimitiveValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String), + + Integer(IntegerWithBase), + Character(char), + String(String), +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + Token::OpenSquare => write!(f, "["), + Token::CloseSquare => write!(f, "]"), + Token::OpenBrace => write!(f, "{{"), + Token::CloseBrace => write!(f, "}}"), + Token::Semi => write!(f, ";"), + Token::Colon => write!(f, ":"), + Token::DoubleColon => write!(f, "::"), + Token::Comma => write!(f, ","), + Token::BackTick => write!(f, "`"), + Token::Arrow => write!(f, "->"), + Token::Lambda(false) => write!(f, "\\"), + Token::Lambda(true) => write!(f, "λ"), + Token::TypeName(str) => write!(f, "{str}"), + Token::ValueName(str) => write!(f, "{str}"), + Token::OperatorName(str) => write!(f, "{str}"), + Token::PrimitiveTypeName(str) => write!(f, "prim%{str}"), + Token::PrimitiveValueName(str) => write!(f, "prim%{str}"), + Token::Integer(IntegerWithBase { base, value }) => match base { + None => write!(f, "{value}"), + Some(2) => write!(f, "0b{value:b}"), + Some(8) => write!(f, "0o{value:o}"), + Some(10) => write!(f, "0d{value}"), + Some(16) => write!(f, "0x{value:x}"), + Some(base) => write!(f, ""), + }, + Token::Character(c) => write!(f, "{c:?}"), + Token::String(s) => write!(f, "{s:?}"), + } + } +} + +#[allow(private_interfaces)] +pub enum Lexer<'a> { + Working(LexerState<'a>), + Errored(LexerError), + Done, +} + +struct LexerState<'a> { + stream: CharIndices<'a>, + buffer: Option<(usize, char)>, +} + +impl<'a> From<&'a str> for Lexer<'a> { + fn from(value: &'a str) -> Self { + Lexer::new(value) + } +} + +impl<'a> Lexer<'a> { + pub fn new(stream: &'a str) -> Self { + Lexer::Working(LexerState { + stream: stream.char_indices(), + buffer: None, + }) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result; + + fn next(&mut self) -> Option { + match self { + Lexer::Done => None, + Lexer::Errored(e) => Some(Err(e.clone())), + Lexer::Working(state) => match state.next_token() { + Err(e) => { + println!("ERROR: {e}"); + *self = Lexer::Errored(e.clone()); + Some(Err(e)) + } + + Ok(None) => { + *self = Lexer::Done; + None + } + + Ok(Some(ltoken)) => Some(Ok(ltoken)), + }, + } + } +} + +impl<'a> LexerState<'a> { + fn next_char(&mut self) -> Option<(usize, char)> { + self.buffer.take().or_else(|| self.stream.next()) + } + + fn stash_char(&mut self, idx: usize, c: char) { + assert!(self.buffer.is_none()); + self.buffer = Some((idx, c)); + } + + fn next_token(&mut self) -> Result, LexerError> { + while let Some((token_start_offset, char)) = self.next_char() { + if char.is_whitespace() { + continue; + } + + let simple_response = |token| { + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) + }; + + match char { + '(' => return simple_response(Token::OpenParen), + ')' => return simple_response(Token::CloseParen), + '[' => return simple_response(Token::OpenSquare), + ']' => return simple_response(Token::CloseSquare), + '{' => return simple_response(Token::OpenBrace), + '}' => return simple_response(Token::CloseBrace), + ';' => return simple_response(Token::Semi), + ',' => return simple_response(Token::Comma), + '`' => return simple_response(Token::BackTick), + '\\' => return simple_response(Token::Lambda(false)), + 'λ' => return simple_response(Token::Lambda(true)), + + '0' => return self.starts_with_zero(token_start_offset), + '\'' => return self.starts_with_single(token_start_offset), + '\"' => return self.starts_with_double(token_start_offset), + '-' => return self.starts_with_dash(token_start_offset), + ':' => return self.starts_with_colon(token_start_offset), + _ => {} + } + + if let Some(value) = char.to_digit(10) { + return self.parse_integer(token_start_offset, 10, None, value as u64); + } + + if char.is_uppercase() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::TypeName, + ); + } + + if char.is_alphabetic() || char == '_' { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| c.is_alphanumeric() || c == '_', + Token::ValueName, + ); + } + + if !char.is_alphanumeric() && !char.is_whitespace() && !char.is_control() { + return self.parse_identifier( + token_start_offset, + char.into(), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ); + } + } + + Ok(None) + } + + fn starts_with_zero( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => { + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some(LocatedToken { + token, + span: token_start_offset..self.stream.offset(), + })) + } + + Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0), + Some((_, 'o')) => self.parse_integer(token_start_offset, 8, Some(8), 0), + Some((_, 'd')) => self.parse_integer(token_start_offset, 10, Some(10), 0), + Some((_, 'x')) => self.parse_integer(token_start_offset, 16, Some(16), 0), + + Some((offset, c)) => { + if let Some(value) = c.to_digit(10) { + self.parse_integer(token_start_offset, 10, None, value as u64) + } else { + self.stash_char(offset, c); + let token = Token::Integer(IntegerWithBase { + base: None, + value: 0, + }); + Ok(Some(LocatedToken { + token, + span: token_start_offset..offset, + })) + } + } + } + } + + fn parse_integer( + &mut self, + token_start_offset: usize, + base: u32, + provided_base: Option, + mut value: u64, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + if let Some(digit) = c.to_digit(base) { + value = (value * (base as u64)) + (digit as u64); + } else { + self.stash_char(offset, c); + break; + } + } + + let token = Token::Integer(IntegerWithBase { + base: provided_base, + value, + }); + + Ok(Some(LocatedToken { + token, + span: token_start_offset..end_offset, + })) + } + + fn parse_identifier( + &mut self, + token_start_offset: usize, + mut identifier: String, + mut allowed_character: fn(char) -> bool, + mut builder: fn(String) -> Token, + ) -> Result, LexerError> { + let mut end_offset = self.stream.offset(); + + while let Some((offset, c)) = self.next_char() { + end_offset = offset; + + if allowed_character(c) { + identifier.push(c); + } else if identifier == "prim" && c == '%' { + identifier = String::new(); + allowed_character = |c| c.is_alphanumeric() || c == '_'; + match self.next_char() { + None => { + return Err(LexerError::IllegalPrimitive { + span: token_start_offset..end_offset, + }); + } + + Some((_, char)) => { + if char.is_uppercase() { + identifier.push(char); + builder = Token::PrimitiveTypeName; + } else if char.is_lowercase() || char == '_' { + identifier.push(char); + builder = Token::PrimitiveValueName; + } else { + return Err(LexerError::IllegalPrimitiveCharacter { + span: token_start_offset..end_offset, + char, + }); + } + } + } + } else { + self.stash_char(offset, c); + break; + } + } + + Ok(Some(LocatedToken { + token: builder(identifier), + span: token_start_offset..end_offset, + })) + } + + fn starts_with_single( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let Some((_, mut char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char == '\\' { + char = self.get_escaped_character(token_start_offset)?; + } + + let Some((idx, finish_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + if finish_char != '\'' { + return Err(LexerError::OverlongCharacter { + char, + span: token_start_offset..self.stream.offset(), + }); + } + + Ok(Some(LocatedToken { + token: Token::Character(char), + span: token_start_offset..idx, + })) + } + + fn get_escaped_character(&mut self, token_start_offset: usize) -> Result { + let Some((idx, escaped_char)) = self.next_char() else { + return Err(LexerError::UnfinishedCharacter { + span: token_start_offset..self.stream.offset(), + }); + }; + + match escaped_char { + '0' => Ok('\0'), + 'a' => Ok('\u{0007}'), + 'b' => Ok('\u{0008}'), + 'f' => Ok('\u{000C}'), + 'n' => Ok('\n'), + 'r' => Ok('\r'), + 't' => Ok('\t'), + 'u' => self.get_unicode_sequence(idx), + 'v' => Ok('\u{000B}'), + '\'' => Ok('\''), + '"' => Ok('"'), + '\\' => Ok('\\'), + _ => Err(LexerError::UnknownEscapeCharacter { + escaped_char, + span: idx..self.stream.offset(), + }), + } + } + + fn get_unicode_sequence(&mut self, token_start_offset: usize) -> Result { + let Some((_, char)) = self.next_char() else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + }; + + if char != '{' { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + let mut value: u32 = 0; + + while let Some((idx, char)) = self.next_char() { + if let Some(digit) = char.to_digit(16) { + if let Some(shifted) = value.checked_shl(4) { + value = shifted + digit; + continue; + } else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..idx, + }); + } + } + + if char == '}' { + if let Some(char) = char::from_u32(value) { + return Ok(char); + } else { + return Err(LexerError::InvalidUnicode { + span: token_start_offset..idx, + }); + } + } + + return Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }); + } + + Err(LexerError::InvalidUnicode { + span: token_start_offset..self.stream.offset(), + }) + } + + fn starts_with_double( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + let mut result = String::new(); + + while let Some((idx, char)) = self.next_char() { + match char { + '"' => { + return Ok(Some(LocatedToken { + token: Token::String(result), + span: token_start_offset..idx, + })); + } + + '\\' => result.push(self.get_escaped_character(idx)?), + + _ => result.push(char), + } + } + + Err(LexerError::UnfinishedString { + span: token_start_offset..self.stream.offset(), + }) + } + + fn starts_with_dash( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..token_start_offset + 1, + })), + Some((end, '>')) => { + let Some((pbloc, peekaboo)) = self.next_char() else { + return Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })); + }; + let is_operator = !peekaboo.is_alphanumeric() + && !peekaboo.is_whitespace() + && !peekaboo.is_control(); + + if is_operator { + self.parse_identifier( + token_start_offset, + format!("->{peekaboo}"), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ) + } else { + self.stash_char(pbloc, peekaboo); + + Ok(Some(LocatedToken { + token: Token::Arrow, + span: token_start_offset..end, + })) + } + } + Some((_, c)) if !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control() => self + .parse_identifier( + token_start_offset, + format!("-{c}"), + |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(), + Token::OperatorName, + ), + Some((idx, c)) => { + self.stash_char(idx, c); + Ok(Some(LocatedToken { + token: Token::OperatorName("-".into()), + span: token_start_offset..idx, + })) + } + } + } + + fn starts_with_colon( + &mut self, + token_start_offset: usize, + ) -> Result, LexerError> { + match self.next_char() { + None => Ok(Some(LocatedToken { + token: Token::Colon, + span: token_start_offset..token_start_offset + 1, + })), + + Some((pos, ':')) => Ok(Some(LocatedToken { + token: Token::DoubleColon, + span: token_start_offset..pos, + })), + + Some((pos, char)) => { + self.stash_char(pos, char); + Ok(Some(LocatedToken { + token: Token::Colon, + span: token_start_offset..token_start_offset + 1, + })) + } + } + } +} + +proptest::proptest! { + #[test] + fn token_string_token(token: Token) { + println!("Starting from {token:?}"); + let string = format!("{token}"); + let mut tokens = Lexer::from(string.as_str()); + let initial_token = tokens.next() + .expect("Can get a token without an error.") + .expect("Can get a valid token.") + .token; + + proptest::prop_assert_eq!(token, initial_token); + proptest::prop_assert!(tokens.next().is_none()); + } +} + +#[cfg(test)] +fn parsed_single_token(s: &str) -> Token { + let mut tokens = Lexer::from(s); + let result = tokens + .next() + .unwrap_or_else(|| panic!("Can get at least one token from {s:?}")) + .expect("Can get a valid token.") + .token; + + assert!( + tokens.next().is_none(), + "Should only get one token from {s:?}" + ); + + result +} + +#[test] +fn numbers_work_as_expected() { + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 1 + }), + parsed_single_token("1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 1 + }), + parsed_single_token("0b1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 1 + }), + parsed_single_token("0o1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 1 + }), + parsed_single_token("0d1") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 1 + }), + parsed_single_token("0x1") + ); + + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 10 + }), + parsed_single_token("10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(2), + value: 2 + }), + parsed_single_token("0b10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(8), + value: 8 + }), + parsed_single_token("0o10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: None, + value: 10 + }), + parsed_single_token("0010") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(10), + value: 10 + }), + parsed_single_token("0d10") + ); + assert_eq!( + Token::Integer(IntegerWithBase { + base: Some(16), + value: 16 + }), + parsed_single_token("0x10") + ); +} + +#[test] +fn lambda_works() { + assert_eq!(Token::Lambda(false), parsed_single_token("\\")); + assert_eq!(Token::Lambda(true), parsed_single_token("λ")); + assert_eq!(Token::TypeName("Λ".into()), parsed_single_token("Λ")); +} + +#[test] +fn types_work_as_expected() { + assert_eq!(Token::TypeName("Int".into()), parsed_single_token("Int")); + assert_eq!(Token::TypeName("Int8".into()), parsed_single_token("Int8")); + assert_eq!(Token::TypeName("Γ".into()), parsed_single_token("Γ")); +} + +#[test] +fn values_work_as_expected() { + assert_eq!( + Token::ValueName("alpha".into()), + parsed_single_token("alpha") + ); + assert_eq!(Token::ValueName("ɑ".into()), parsed_single_token("ɑ")); +} + +#[test] +fn primitives() { + assert_eq!( + Token::PrimitiveValueName("add_u8".into()), + parsed_single_token("prim%add_u8"), + ); + assert_eq!( + Token::PrimitiveTypeName("U8".into()), + parsed_single_token("prim%U8"), + ); + assert!(Lexer::from("prim%").next().unwrap().is_err()); + assert!(Lexer::from("prim%%").next().unwrap().is_err()); +} + +#[test] +fn operators_work_as_expected() { + assert_eq!(Token::OperatorName("-".into()), parsed_single_token("-")); + assert_eq!(Token::OperatorName("+".into()), parsed_single_token("+")); + assert_eq!(Token::OperatorName("*".into()), parsed_single_token("*")); + assert_eq!(Token::OperatorName("/".into()), parsed_single_token("/")); + assert_eq!(Token::OperatorName("↣".into()), parsed_single_token("↣")); +} + +#[test] +fn can_separate_pieces() { + let mut lexer = Lexer::from("a-b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a--b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("--".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); + + let mut lexer = Lexer::from("a - -b"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + + assert_eq!(Some(Token::ValueName("a".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::OperatorName("-".into())), next_token()); + assert_eq!(Some(Token::ValueName("b".into())), next_token()); + assert_eq!(None, next_token()); +} + +#[test] +fn arrow_requires_nonop() { + let mut lexer = Lexer::from("->"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Arrow), next_token()); + + let mut lexer = Lexer::from("->*"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::OperatorName("->*".into())), next_token()); + + let mut lexer = Lexer::from("->*x"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::OperatorName("->*".into())), next_token()); + + let mut lexer = Lexer::from("->x"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Arrow), next_token()); +} + +#[test] +fn unicode() { + let mut lexer = Lexer::from("'\\u{00BE}'"); + let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").token); + assert_eq!(Some(Token::Character('¾')), next_token()); + + let mut lexer = Lexer::from("'\\u{11111111111111111111111111111}'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u{00BE'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u00BE}'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\u{00Z}'"); + assert!(lexer.next().unwrap().is_err()); +} + +#[test] +fn character_string_errors() { + let mut lexer = Lexer::from("'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'-\\"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("''"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'ab'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'\\x'"); + assert!(lexer.next().unwrap().is_err()); + let mut lexer = Lexer::from("'a'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('a'), + .. + })) + )); + let mut lexer = Lexer::from("'\\0'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\0'), + .. + })) + )); + let mut lexer = Lexer::from("'\\a'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\b'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\f'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\n'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\r'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\t'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\v'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character(_), + .. + })) + )); + let mut lexer = Lexer::from("'\\''"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\''), + .. + })) + )); + let mut lexer = Lexer::from("'\\\\'"); + assert!(matches!( + lexer.next(), + Some(Ok(LocatedToken { + token: Token::Character('\\'), + .. + })) + )); + + let mut lexer = Lexer::from("\"foo"); + assert!(lexer.next().unwrap().is_err()); +} diff --git a/src/syntax/universe.rs b/src/syntax/universe.rs new file mode 100644 index 0000000..439250e --- /dev/null +++ b/src/syntax/universe.rs @@ -0,0 +1,44 @@ +use crate::syntax::ast::*; +use crate::syntax::error::ParserError; +use crate::syntax::parse::Parser; +use crate::syntax::tokens::Lexer; +use memmap2::Mmap; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +#[derive(Default)] +pub struct Universe { + pub files: HashMap, + pub modules: HashMap, +} + +impl Universe { + /// Add a file to this universe. + /// + /// This may result in other files being loaded on behalf of the file, if + /// (for example) the given file has imports. + pub fn add_file>(&mut self, file: P) -> Result<(), ParserError> { + let filename = file.as_ref().to_string_lossy().into_owned(); + + let file_handle = std::fs::File::open(&file).map_err(|e| ParserError::OpenError { + file: filename.clone(), + error: e, + })?; + let contents = unsafe { Mmap::map(&file_handle) }.map_err(|e| ParserError::ReadError { + file: filename.clone(), + error: e, + })?; + let string_contents = + std::str::from_utf8(&contents).map_err(|e| ParserError::Utf8Error { + file: filename.clone(), + error: e, + })?; + + let lexer = Lexer::from(string_contents); + let mut parser = Parser::new(&file, lexer); + let module = parser.parse_module()?; + self.modules.insert(file.as_ref().to_path_buf(), module); + + Ok(()) + } +}