Start a Rust implementation, which is broken with gitignore.

2025-08-09 13:47:08 -07:00
parent 5a5902af6b
commit a663d8f1fb
10 changed files with 2087 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,8 @@
 hsrc/Syntax/Lexer.hs
 hsrc/Syntax/Parser.hs
 bang
+
+
+# Added by cargo
+
+/target
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "bang"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+codespan = "0.12.0"
+codespan-reporting = "0.12.0"
+lalrpop-util = "0.20.2"
+logos = "0.15.1"
+proptest = "1.7.0"
+proptest-derive = "0.6.0"
+thiserror = "2.0.12"
+
+[build-dependencies]
+lalrpop = "0.20.2"
--- a/build.rs
+++ b/build.rs
@@ -0,0 +1,5 @@
+extern crate lalrpop;
+
+fn main() {
+    lalrpop::process_root().unwrap();
+}
--- a/src/bin/bangc.rs
+++ b/src/bin/bangc.rs
@@ -0,0 +1 @@
+fn main() {}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1 @@
+pub mod syntax;
--- a/src/syntax.rs
+++ b/src/syntax.rs
@@ -0,0 +1,251 @@
+use lalrpop_util::lalrpop_mod;
+
+mod error;
+lalrpop_mod!(
+    #[allow(clippy::just_underscores_and_digits, clippy::clone_on_copy)]
+    parser,
+    "/syntax/parser.rs"
+);
+pub mod tokens;
+
+#[cfg(test)]
+use crate::syntax::error::ParserError;
+#[cfg(test)]
+use crate::syntax::parser::*;
+#[cfg(test)]
+use crate::syntax::tokens::Lexer;
+use codespan_reporting::diagnostic::Label;
+use proptest_derive::Arbitrary;
+use std::cmp::{max, min};
+use std::fmt::Debug;
+use std::ops::Range;
+
+#[derive(Debug)]
+pub struct Location {
+    file_id: usize,
+    span: Range<usize>,
+}
+
+impl Location {
+    pub fn new(file_id: usize, span: Range<usize>) -> Self {
+        Location { file_id, span }
+    }
+
+    pub fn extend_to(&self, other: &Location) -> Location {
+        assert_eq!(self.file_id, other.file_id);
+        Location {
+            file_id: self.file_id,
+            span: min(self.span.start, other.span.start)..max(self.span.end, other.span.end),
+        }
+    }
+
+    pub fn primary_label(&self) -> Label<usize> {
+        Label::primary(self.file_id, self.span.clone())
+    }
+
+    pub fn secondary_label(&self) -> Label<usize> {
+        Label::secondary(self.file_id, self.span.clone())
+    }
+}
+
+pub struct Module {
+    definitions: Vec<Definition>,
+}
+
+pub struct Definition {
+    location: Location,
+    export: ExportClass,
+    type_restrictions: TypeRestrictions,
+    definition: Def,
+}
+
+pub enum Def {
+    Enumeration(EnumerationDef),
+    Structure(StructureDef),
+    Function(FunctionDef),
+    Value(ValueDef),
+}
+
+impl Def {
+    fn location(&self) -> &Location {
+        match self {
+            Def::Enumeration(def) => &def.location,
+            Def::Structure(def) => &def.location,
+            Def::Function(def) => &def.location,
+            Def::Value(def) => &def.location,
+        }
+    }
+}
+
+pub struct EnumerationDef {
+    location: Location,
+    options: Vec<EnumerationVariant>,
+}
+
+pub struct EnumerationVariant {
+    location: Location,
+    name: String,
+    arguments: Vec<Type>,
+}
+
+pub struct StructureDef {
+    name: String,
+    location: Location,
+    fields: Vec<StructureField>,
+}
+
+pub struct StructureField {
+    name: String,
+    field_type: Type,
+}
+
+pub struct FunctionDef {
+    name: String,
+    location: Location,
+    arguments: Vec<FunctionArg>,
+    return_type: Option<Type>,
+    body: Vec<Statement>,
+}
+
+pub struct FunctionArg {
+    name: String,
+    arg_type: Option<Type>,
+}
+
+pub struct ValueDef {
+    name: String,
+    location: Location,
+    value: Value,
+}
+
+pub enum ExportClass {
+    Public,
+    Private,
+}
+
+pub enum Statement {
+    Binding(BindingStmt),
+}
+
+pub struct BindingStmt {
+    location: Location,
+    mutable: bool,
+    variable: String,
+    value: Expression,
+}
+
+pub enum Expression {
+    Value(Value),
+}
+
+pub struct TypeRestrictions {
+    restrictions: Vec<TypeRestriction>,
+}
+
+impl TypeRestrictions {
+    fn empty() -> Self {
+        TypeRestrictions {
+            restrictions: vec![],
+        }
+    }
+}
+
+pub struct TypeRestriction {
+    location: Location,
+    class: String,
+    variables: Vec<String>,
+}
+
+pub enum Type {
+    Constructor(Location, String),
+    Variable(Location, String),
+    Primitive(Location, String),
+    Application(Box<Type>, Vec<Type>),
+    Function(Vec<Type>, Box<Type>),
+}
+
+pub enum Value {
+    Constant(ConstantValue),
+}
+
+pub enum ConstantValue {
+    Integer(Location, IntegerWithBase),
+    Character(Location, char),
+    String(Location, String),
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)]
+pub struct IntegerWithBase {
+    #[proptest(strategy = "proptest::prop_oneof![ \
+        proptest::strategy::Just(None), \
+        proptest::strategy::Just(Some(2)), \
+        proptest::strategy::Just(Some(8)), \
+        proptest::strategy::Just(Some(10)), \
+        proptest::strategy::Just(Some(16)), \
+    ]")]
+    base: Option<u8>,
+    value: u64,
+}
+
+#[test]
+fn can_parse_constants() {
+    let parse_constant = |str| {
+        let lexer = Lexer::from(str).map(|item| {
+            item.map_err(|e| ParserError::LexerError {
+                file_id: 0,
+                error: e,
+            })
+        });
+        let result = ConstantValueParser::new().parse(0, lexer);
+        result
+    };
+
+    assert!(matches!(
+        parse_constant("16"),
+        Ok(ConstantValue::Integer(
+            _,
+            IntegerWithBase {
+                base: None,
+                value: 16,
+            }
+        ))
+    ));
+    assert!(matches!(
+        parse_constant("0x10"),
+        Ok(ConstantValue::Integer(
+            _,
+            IntegerWithBase {
+                base: Some(16),
+                value: 16,
+            }
+        ))
+    ));
+    assert!(matches!(
+        parse_constant("0o20"),
+        Ok(ConstantValue::Integer(
+            _,
+            IntegerWithBase {
+                base: Some(8),
+                value: 16,
+            }
+        ))
+    ));
+    assert!(matches!(
+        parse_constant("0b10000"),
+        Ok(ConstantValue::Integer(
+            _,
+            IntegerWithBase {
+                base: Some(2),
+                value: 16,
+            }
+        ))
+    ));
+    assert!(
+        matches!(parse_constant("\"foo\""), Ok(ConstantValue::String(_, x))
+            if x == "foo")
+    );
+    assert!(matches!(
+        parse_constant("'f'"),
+        Ok(ConstantValue::Character(_, 'f'))
+    ));
+}
--- a/src/syntax/error.rs
+++ b/src/syntax/error.rs
@@ -0,0 +1,116 @@
+//use codespan_reporting::diagnostic::{Diagnostic, Label};
+use crate::syntax::tokens::Token;
+use std::ops::Range;
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum ParserError {
+    #[error("Lexer error at {file_id}: {error}")]
+    LexerError { file_id: usize, error: LexerError },
+}
+
+#[derive(Clone, Debug, Error, PartialEq)]
+pub enum LexerError {
+    #[error("Illegal control character in input stream at offset {offset}")]
+    IllegalControlCharacter { offset: usize },
+
+    #[error("Illegal primitive value/type; it cut off before we could determine which at {span:?}")]
+    IllegalPrimitive { span: Range<usize> },
+
+    #[error("Illegal character in primitive ({char:?}) at {span:?}")]
+    IllegalPrimitiveCharacter { span: Range<usize>, char: char },
+
+    #[error("Unfinished character constant found at {span:?}")]
+    UnfinishedCharacter { span: Range<usize> },
+
+    #[error("Unfinished string constant found at {span:?}")]
+    UnfinishedString { span: Range<usize> },
+
+    #[error("Character {char:?} has some extra bits at the end at {span:?}")]
+    OverlongCharacter { char: char, span: Range<usize> },
+
+    #[error("Unknown escaped character {escaped_char:?} at {span:?}")]
+    UnknownEscapeCharacter {
+        escaped_char: char,
+        span: Range<usize>,
+    },
+
+    #[error("Invalid unicode escape sequence at {span:?}")]
+    InvalidUnicode { span: Range<usize> },
+}
+
+impl LexerError {
+    pub fn to_triple(&self) -> (usize, Result<Token, LexerError>, usize) {
+        match self {
+            LexerError::IllegalControlCharacter { offset } => (*offset, Err(self.clone()), *offset),
+            LexerError::IllegalPrimitive { span } => (span.start, Err(self.clone()), span.end),
+            LexerError::IllegalPrimitiveCharacter { span, .. } => {
+                (span.start, Err(self.clone()), span.end)
+            }
+            LexerError::UnfinishedCharacter { span, .. } => {
+                (span.start, Err(self.clone()), span.end)
+            }
+            LexerError::UnfinishedString { span, .. } => (span.start, Err(self.clone()), span.end),
+            LexerError::OverlongCharacter { span, .. } => (span.start, Err(self.clone()), span.end),
+            LexerError::UnknownEscapeCharacter { span, .. } => {
+                (span.start, Err(self.clone()), span.end)
+            }
+            LexerError::InvalidUnicode { span, .. } => (span.start, Err(self.clone()), span.end),
+        }
+    }
+}
+
+//impl<F> From<LexerError> for Diagnostic<F> {
+//    fn from(value: LexerError) -> Self {
+//        match value {
+//            LexerError::IllegalControlCharacter { file, offset } => Diagnostic::error()
+//                .with_code("E1001")
+//                .with_message("Illegal control character in input stream")
+//                .with_label(Label::primary(file, offset..offset).with_message("illegal character")),
+//
+//            LexerError::IllegalPrimitive { file, span } => Diagnostic::error()
+//                .with_code("E1002")
+//                .with_message("Illegal primitive; it cut off before it could finish")
+//                .with_label(
+//                    Label::primary(file, span)
+//                        .with_message("should be at least one character after the %"),
+//                ),
+//
+//            LexerError::IllegalPrimitiveCharacter { file, span, char } => Diagnostic::error()
+//                .with_code("E1003")
+//                .with_message(format!("Illegal character {char:?} in primitive"))
+//                .with_label(Label::primary(file, span).with_message("illegal character")),
+//
+//            LexerError::UnfinishedCharacter { file, span } => Diagnostic::error()
+//                .with_code("E1004")
+//                .with_message("Unfinished character in input stream.")
+//                .with_label(Label::primary(file, span).with_message("unfinished character")),
+//
+//            LexerError::UnfinishedString { file, span } => Diagnostic::error()
+//                .with_code("E1005")
+//                .with_message("Unfinished string in input stream.")
+//                .with_label(Label::primary(file, span).with_message("unfinished string")),
+//
+//            LexerError::OverlongCharacter { file, char, span } => Diagnostic::error()
+//                .with_code("E1006")
+//                .with_message(format!(
+//                    "Character {char:?} has some extra bits at the end of it."
+//                ))
+//                .with_label(Label::primary(file, span).with_message("overlong character")),
+//
+//            LexerError::UnknownEscapeCharacter {
+//                file,
+//                escaped_char,
+//                span,
+//            } => Diagnostic::error()
+//                .with_code("E1007")
+//                .with_message(format!("Unknown escape character {escaped_char:?}."))
+//                .with_label(Label::primary(file, span).with_message("unknown character")),
+//
+//            LexerError::InvalidUnicode { file, span } => Diagnostic::error()
+//                .with_code("E1008")
+//                .with_message("Unknown or invalid unicode escape sequence.")
+//                .with_label(Label::primary(file, span).with_message("escape sequence")),
+//        }
+//    }
+//}
--- a/src/syntax/parser.lalrpop
+++ b/src/syntax/parser.lalrpop
@@ -0,0 +1,39 @@
+use crate::syntax::*;
+use crate::syntax::error::ParserError;
+use crate::syntax::tokens::*;
+
+grammar(file_id: usize);
+
+extern {
+  type Location = usize;
+  type Error = ParserError; 
+
+  enum Token {
+    "(" => Token::OpenParen,
+    ")" => Token::CloseParen,
+    "[" => Token::OpenSquare,
+    "]" => Token::CloseSquare,
+    "{" => Token::OpenBrace,
+    "}" => Token::CloseBrace,
+    ";" => Token::Semi,
+    ":" => Token::Colon,
+    "," => Token::Comma,
+    "`" => Token::BackTick,
+    "\\" => Token::Lambda(_),
+
+    "<constructor>" => Token::TypeName(<String>),
+    "<value>" => Token::ValueName(<String>),
+    "<op>" => Token::OperatorName(<String>),
+    "<prim_constructor>" => Token::PrimitiveTypeName(<String>),
+    "<prim_value>" => Token::PrimitiveValueName(<String>),
+    "<integer>" => Token::Integer(<IntegerWithBase>),
+    "<char>" => Token::Character(<char>),
+    "<string>" => Token::String(<String>),
+  }
+}
+
+pub ConstantValue: ConstantValue = {
+  <s:@L> <x:"<integer>"> <e:@L> => ConstantValue::Integer(Location::new(file_id, s..e), x),
+  <s:@L> <x:"<char>"> <e:@L> => ConstantValue::Character(Location::new(file_id, s..e), x),
+  <s:@L> <x:"<string>"> <e:@L> => ConstantValue::String(Location::new(file_id, s..e), x),
+}
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -0,0 +1,609 @@
+use crate::syntax::IntegerWithBase;
+use crate::syntax::error::LexerError;
+use proptest_derive::Arbitrary;
+use std::fmt;
+use std::str::CharIndices;
+
+/// A single token of the input stream; used to help the parsing function over
+/// more concrete things than bytes.
+///
+/// The [`std::fmt::Display`] implementation is designed to round-trip, so those
+/// needing a more regular or descriptive option should consider using the
+/// [`std::fmt::Debug`] implementation instead.
+#[derive(Clone, Debug, PartialEq, Eq, Arbitrary)]
+pub enum Token {
+    OpenParen,
+    CloseParen,
+    OpenSquare,
+    CloseSquare,
+    OpenBrace,
+    CloseBrace,
+    Semi,
+    Colon,
+    Comma,
+    BackTick,
+    Lambda(bool),
+
+    TypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String),
+    ValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String),
+    OperatorName(
+        #[proptest(
+            regex = r"[\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|][\~\!\@\#\$\%\^\&\*\+\-\=\.<>\?\|_]*"
+        )]
+        String,
+    ),
+
+    PrimitiveTypeName(#[proptest(regex = r"[A-Z][a-zA-Z0-9_]*")] String),
+    PrimitiveValueName(#[proptest(regex = r"[a-z_][a-zA-Z0-9_]*")] String),
+
+    Integer(IntegerWithBase),
+    Character(char),
+    String(String),
+}
+
+impl fmt::Display for Token {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Token::OpenParen => write!(f, "("),
+            Token::CloseParen => write!(f, ")"),
+            Token::OpenSquare => write!(f, "["),
+            Token::CloseSquare => write!(f, "]"),
+            Token::OpenBrace => write!(f, "{{"),
+            Token::CloseBrace => write!(f, "}}"),
+            Token::Semi => write!(f, ";"),
+            Token::Colon => write!(f, ":"),
+            Token::Comma => write!(f, ","),
+            Token::BackTick => write!(f, "`"),
+            Token::Lambda(false) => write!(f, "\\"),
+            Token::Lambda(true) => write!(f, "λ"),
+            Token::TypeName(str) => write!(f, "{str}"),
+            Token::ValueName(str) => write!(f, "{str}"),
+            Token::OperatorName(str) => write!(f, "{str}"),
+            Token::PrimitiveTypeName(str) => write!(f, "prim%{str}"),
+            Token::PrimitiveValueName(str) => write!(f, "prim%{str}"),
+            Token::Integer(IntegerWithBase { base, value }) => match base {
+                None => write!(f, "{value}"),
+                Some(2) => write!(f, "0b{value:b}"),
+                Some(8) => write!(f, "0o{value:o}"),
+                Some(10) => write!(f, "0d{value}"),
+                Some(16) => write!(f, "0x{value:x}"),
+                Some(base) => write!(f, "<illegal number token base={base} value={value}>"),
+            },
+            Token::Character(c) => write!(f, "{c:?}"),
+            Token::String(s) => write!(f, "{s:?}"),
+        }
+    }
+}
+
+#[allow(private_interfaces)]
+pub enum Lexer<'a> {
+    Working(LexerState<'a>),
+    Errored(LexerError),
+    Done(usize),
+}
+
+struct LexerState<'a> {
+    stream: CharIndices<'a>,
+    buffer: Option<(usize, char)>,
+}
+
+impl<'a> From<&'a str> for Lexer<'a> {
+    fn from(value: &'a str) -> Self {
+        println!("LEXING '{value}'");
+        Lexer::Working(LexerState {
+            stream: value.char_indices(),
+            buffer: None,
+        })
+    }
+}
+
+impl<'a> Lexer<'a> {
+    pub fn new(stream: &'a str) -> Self {
+        Lexer::Working(LexerState {
+            stream: stream.char_indices(),
+            buffer: None,
+        })
+    }
+}
+
+impl<'a> Iterator for Lexer<'a> {
+    type Item = Result<(usize, Token, usize), LexerError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            Lexer::Done(_) => None,
+            Lexer::Errored(e) => Some(Err(e.clone())),
+            Lexer::Working(state) => match state.next_token() {
+                Err(e) => {
+                    println!("ERROR: {e}");
+                    *self = Lexer::Errored(e.clone());
+                    Some(Err(e))
+                }
+
+                Ok(None) => {
+                    println!("LEXER DONE");
+                    *self = Lexer::Done(state.stream.offset());
+                    None
+                }
+
+                Ok(Some((start, token, end))) => {
+                    println!("TOKEN: {:?}", token);
+                    Some(Ok((start, token, end)))
+                }
+            },
+        }
+    }
+}
+
+impl<'a> LexerState<'a> {
+    fn next_char(&mut self) -> Option<(usize, char)> {
+        let result = self.buffer.take().or_else(|| self.stream.next());
+        println!("next_char() -> {result:?}");
+        result
+    }
+
+    fn stash_char(&mut self, idx: usize, c: char) {
+        println!("stash_char({idx}, {c})");
+        assert!(self.buffer.is_none());
+        self.buffer = Some((idx, c));
+    }
+
+    fn next_token(&mut self) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        while let Some((token_start_offset, char)) = self.next_char() {
+            if char.is_whitespace() {
+                continue;
+            }
+
+            let simple_response =
+                |token| Ok(Some((token_start_offset, token, self.stream.offset())));
+
+            match char {
+                '(' => return simple_response(Token::OpenParen),
+                ')' => return simple_response(Token::CloseParen),
+                '[' => return simple_response(Token::OpenSquare),
+                ']' => return simple_response(Token::CloseSquare),
+                '{' => return simple_response(Token::OpenBrace),
+                '}' => return simple_response(Token::CloseBrace),
+                ';' => return simple_response(Token::Semi),
+                ':' => return simple_response(Token::Colon),
+                ',' => return simple_response(Token::Comma),
+                '`' => return simple_response(Token::BackTick),
+                '\\' => return simple_response(Token::Lambda(false)),
+                'λ' => return simple_response(Token::Lambda(true)),
+
+                '0' => return self.starts_with_zero(token_start_offset),
+                '\'' => return self.starts_with_single(token_start_offset),
+                '\"' => return self.starts_with_double(token_start_offset),
+                _ => {}
+            }
+
+            if let Some(value) = char.to_digit(10) {
+                return self.parse_integer(token_start_offset, 10, None, value as u64);
+            }
+
+            if char.is_uppercase() {
+                return self.parse_identifier(
+                    token_start_offset,
+                    char.into(),
+                    |c| c.is_alphanumeric() || c == '_',
+                    Token::TypeName,
+                );
+            }
+
+            if char.is_alphabetic() || char == '_' {
+                return self.parse_identifier(
+                    token_start_offset,
+                    char.into(),
+                    |c| c.is_alphanumeric() || c == '_',
+                    Token::ValueName,
+                );
+            }
+
+            if !char.is_alphanumeric() && !char.is_whitespace() && !char.is_control() {
+                return self.parse_identifier(
+                    token_start_offset,
+                    char.into(),
+                    |c| !c.is_alphanumeric() && !c.is_whitespace() && !c.is_control(),
+                    Token::OperatorName,
+                );
+            }
+        }
+
+        Ok(None)
+    }
+
+    fn starts_with_zero(
+        &mut self,
+        token_start_offset: usize,
+    ) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        match self.next_char() {
+            None => {
+                let token = Token::Integer(IntegerWithBase {
+                    base: None,
+                    value: 0,
+                });
+                Ok(Some((token_start_offset, token, self.stream.offset())))
+            }
+
+            Some((_, 'b')) => self.parse_integer(token_start_offset, 2, Some(2), 0),
+            Some((_, 'o')) => self.parse_integer(token_start_offset, 8, Some(8), 0),
+            Some((_, 'd')) => self.parse_integer(token_start_offset, 10, Some(10), 0),
+            Some((_, 'x')) => self.parse_integer(token_start_offset, 16, Some(16), 0),
+
+            Some((offset, c)) => {
+                if let Some(value) = c.to_digit(10) {
+                    self.parse_integer(token_start_offset, 10, None, value as u64)
+                } else {
+                    self.stash_char(offset, c);
+                    let token = Token::Integer(IntegerWithBase {
+                        base: None,
+                        value: 0,
+                    });
+                    Ok(Some((token_start_offset, token, offset)))
+                }
+            }
+        }
+    }
+
+    fn parse_integer(
+        &mut self,
+        token_start_offset: usize,
+        base: u32,
+        provided_base: Option<u8>,
+        mut value: u64,
+    ) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        let mut end_offset = self.stream.offset();
+
+        while let Some((offset, c)) = self.next_char() {
+            end_offset = offset;
+            if let Some(digit) = c.to_digit(base) {
+                value = (value * (base as u64)) + (digit as u64);
+            } else {
+                self.stash_char(offset, c);
+                break;
+            }
+        }
+
+        let token = Token::Integer(IntegerWithBase {
+            base: provided_base,
+            value,
+        });
+
+        Ok(Some((token_start_offset, token, end_offset)))
+    }
+
+    fn parse_identifier(
+        &mut self,
+        token_start_offset: usize,
+        mut identifier: String,
+        mut allowed_character: fn(char) -> bool,
+        mut builder: fn(String) -> Token,
+    ) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        let mut end_offset = self.stream.offset();
+
+        while let Some((offset, c)) = self.next_char() {
+            end_offset = offset;
+
+            if allowed_character(c) {
+                identifier.push(c);
+            } else if identifier == "prim" && c == '%' {
+                identifier = String::new();
+                allowed_character = |c| c.is_alphanumeric() || c == '_';
+                match self.next_char() {
+                    None => {
+                        return Err(LexerError::IllegalPrimitive {
+                            span: token_start_offset..end_offset,
+                        });
+                    }
+
+                    Some((_, char)) => {
+                        if char.is_uppercase() {
+                            identifier.push(char);
+                            builder = Token::PrimitiveTypeName;
+                        } else if char.is_lowercase() || char == '_' {
+                            identifier.push(char);
+                            builder = Token::PrimitiveValueName;
+                        } else {
+                            return Err(LexerError::IllegalPrimitiveCharacter {
+                                span: token_start_offset..end_offset,
+                                char,
+                            });
+                        }
+                    }
+                }
+            } else {
+                self.stash_char(offset, c);
+                break;
+            }
+        }
+
+        Ok(Some((token_start_offset, builder(identifier), end_offset)))
+    }
+
+    fn starts_with_single(
+        &mut self,
+        token_start_offset: usize,
+    ) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        let Some((_, mut char)) = self.next_char() else {
+            return Err(LexerError::UnfinishedCharacter {
+                span: token_start_offset..self.stream.offset(),
+            });
+        };
+
+        if char == '\\' {
+            char = self.get_escaped_character(token_start_offset)?;
+        }
+
+        let Some((idx, finish_char)) = self.next_char() else {
+            return Err(LexerError::UnfinishedCharacter {
+                span: token_start_offset..self.stream.offset(),
+            });
+        };
+
+        if finish_char != '\'' {
+            return Err(LexerError::OverlongCharacter {
+                char,
+                span: token_start_offset..self.stream.offset(),
+            });
+        }
+
+        Ok(Some((token_start_offset, Token::Character(char), idx)))
+    }
+
+    fn get_escaped_character(&mut self, token_start_offset: usize) -> Result<char, LexerError> {
+        let Some((idx, escaped_char)) = self.next_char() else {
+            return Err(LexerError::UnfinishedCharacter {
+                span: token_start_offset..self.stream.offset(),
+            });
+        };
+
+        match escaped_char {
+            '0' => Ok('\0'),
+            'a' => Ok('\u{0007}'),
+            'b' => Ok('\u{0008}'),
+            'f' => Ok('\u{000C}'),
+            'n' => Ok('\n'),
+            'r' => Ok('\r'),
+            't' => Ok('\t'),
+            'u' => self.get_unicode_sequence(idx),
+            'v' => Ok('\u{000B}'),
+            '\'' => Ok('\''),
+            '"' => Ok('"'),
+            '\\' => Ok('\\'),
+            _ => Err(LexerError::UnknownEscapeCharacter {
+                escaped_char,
+                span: idx..self.stream.offset(),
+            }),
+        }
+    }
+
+    fn get_unicode_sequence(&mut self, token_start_offset: usize) -> Result<char, LexerError> {
+        let Some((_, char)) = self.next_char() else {
+            return Err(LexerError::InvalidUnicode {
+                span: token_start_offset..self.stream.offset(),
+            });
+        };
+
+        if char != '{' {
+            return Err(LexerError::InvalidUnicode {
+                span: token_start_offset..self.stream.offset(),
+            });
+        }
+
+        let mut value = 0;
+
+        while let Some((idx, char)) = self.next_char() {
+            if let Some(digit) = char.to_digit(16) {
+                value = (value * 16) + digit;
+                continue;
+            }
+
+            if char == '}' {
+                if let Some(char) = char::from_u32(value) {
+                    return Ok(char);
+                } else {
+                    return Err(LexerError::InvalidUnicode {
+                        span: token_start_offset..idx,
+                    });
+                }
+            }
+
+            return Err(LexerError::InvalidUnicode {
+                span: token_start_offset..self.stream.offset(),
+            });
+        }
+
+        Err(LexerError::InvalidUnicode {
+            span: token_start_offset..self.stream.offset(),
+        })
+    }
+
+    fn starts_with_double(
+        &mut self,
+        token_start_offset: usize,
+    ) -> Result<Option<(usize, Token, usize)>, LexerError> {
+        let mut result = String::new();
+
+        while let Some((idx, char)) = self.next_char() {
+            match char {
+                '"' => return Ok(Some((token_start_offset, Token::String(result), idx))),
+
+                '\\' => result.push(self.get_escaped_character(idx)?),
+
+                _ => result.push(char),
+            }
+        }
+
+        Err(LexerError::UnfinishedString {
+            span: token_start_offset..self.stream.offset(),
+        })
+    }
+}
+
+proptest::proptest! {
+    #[test]
+    fn token_string_token(token: Token) {
+        println!("Starting from {token:?}");
+        let string = format!("{token}");
+        let mut tokens = Lexer::from(string.as_str());
+        let initial_token = tokens.next()
+            .expect("Can get a token without an error.")
+            .expect("Can get a valid token.")
+            .1;
+
+        proptest::prop_assert_eq!(token, initial_token);
+        proptest::prop_assert!(tokens.next().is_none());
+    }
+}
+
+#[cfg(test)]
+fn parsed_single_token(s: &str) -> Token {
+    let mut tokens = Lexer::from(s);
+    let result = tokens
+        .next()
+        .expect(format!("Can get at least one token from {s:?}").as_str())
+        .expect("Can get a valid token.")
+        .1;
+
+    assert!(
+        tokens.next().is_none(),
+        "Should only get one token from {s:?}"
+    );
+
+    result
+}
+
+#[test]
+fn numbers_work_as_expected() {
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: None,
+            value: 1
+        }),
+        parsed_single_token("1")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(2),
+            value: 1
+        }),
+        parsed_single_token("0b1")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(8),
+            value: 1
+        }),
+        parsed_single_token("0o1")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(10),
+            value: 1
+        }),
+        parsed_single_token("0d1")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(16),
+            value: 1
+        }),
+        parsed_single_token("0x1")
+    );
+
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: None,
+            value: 10
+        }),
+        parsed_single_token("10")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(2),
+            value: 2
+        }),
+        parsed_single_token("0b10")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(8),
+            value: 8
+        }),
+        parsed_single_token("0o10")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(10),
+            value: 10
+        }),
+        parsed_single_token("0d10")
+    );
+    assert_eq!(
+        Token::Integer(IntegerWithBase {
+            base: Some(16),
+            value: 16
+        }),
+        parsed_single_token("0x10")
+    );
+}
+
+#[test]
+fn lambda_works() {
+    assert_eq!(Token::Lambda(false), parsed_single_token("\\"));
+    assert_eq!(Token::Lambda(true), parsed_single_token("λ"));
+    assert_eq!(Token::TypeName("Λ".into()), parsed_single_token("Λ"));
+}
+
+#[test]
+fn types_work_as_expected() {
+    assert_eq!(Token::TypeName("Int".into()), parsed_single_token("Int"));
+    assert_eq!(Token::TypeName("Int8".into()), parsed_single_token("Int8"));
+    assert_eq!(Token::TypeName("Γ".into()), parsed_single_token("Γ"));
+}
+
+#[test]
+fn values_work_as_expected() {
+    assert_eq!(
+        Token::ValueName("alpha".into()),
+        parsed_single_token("alpha")
+    );
+    assert_eq!(Token::ValueName("ɑ".into()), parsed_single_token("ɑ"));
+}
+
+#[test]
+fn operators_work_as_expected() {
+    assert_eq!(Token::OperatorName("-".into()), parsed_single_token("-"));
+    assert_eq!(Token::OperatorName("+".into()), parsed_single_token("+"));
+    assert_eq!(Token::OperatorName("*".into()), parsed_single_token("*"));
+    assert_eq!(Token::OperatorName("/".into()), parsed_single_token("/"));
+    assert_eq!(Token::OperatorName("↣".into()), parsed_single_token("↣"));
+}
+
+#[test]
+fn can_separate_pieces() {
+    let mut lexer = Lexer::from("a-b");
+    let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1);
+
+    assert_eq!(Some(Token::ValueName("a".into())), next_token());
+    assert_eq!(Some(Token::OperatorName("-".into())), next_token());
+    assert_eq!(Some(Token::ValueName("b".into())), next_token());
+    assert_eq!(None, next_token());
+
+    let mut lexer = Lexer::from("a--b");
+    let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1);
+
+    assert_eq!(Some(Token::ValueName("a".into())), next_token());
+    assert_eq!(Some(Token::OperatorName("--".into())), next_token());
+    assert_eq!(Some(Token::ValueName("b".into())), next_token());
+    assert_eq!(None, next_token());
+
+    let mut lexer = Lexer::from("a - -b");
+    let mut next_token = move || lexer.next().map(|x| x.expect("Can read valid token").1);
+
+    assert_eq!(Some(Token::ValueName("a".into())), next_token());
+    assert_eq!(Some(Token::OperatorName("-".into())), next_token());
+    assert_eq!(Some(Token::OperatorName("-".into())), next_token());
+    assert_eq!(Some(Token::ValueName("b".into())), next_token());
+    assert_eq!(None, next_token());
+}