121 lines
4.3 KiB
Plaintext
121 lines
4.3 KiB
Plaintext
-- -*- mode: haskell -*-
|
|
-- vi: set ft=haskell :
|
|
{
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# OPTIONS -w #-}
|
|
module Bang.Syntax.Lexer(lexer)
|
|
where
|
|
|
|
import Bang.Syntax.Location
|
|
import Bang.Syntax.Name
|
|
import Bang.Syntax.Token
|
|
import Data.Char(isSpace, isAscii, ord)
|
|
import Data.Int(Int64)
|
|
import Data.Maybe(fromMaybe)
|
|
import Data.Text.Lazy(Text)
|
|
import qualified Data.Text.Lazy as T
|
|
import Data.Word(Word8)
|
|
|
|
}
|
|
|
|
-- Digits
|
|
$decdigit = 0-9
|
|
$hexdigit = [0-9a-fA-f]
|
|
$octdigit = 0-7
|
|
$bindigit = [01]
|
|
|
|
-- Identifier Characters
|
|
$typestart = [A-Z\_]
|
|
$valstart = [a-z\_]
|
|
$identrest = [a-zA-Z0-9\_\.]
|
|
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|:]
|
|
$escape_char = [abfnrtv'\"\\]
|
|
|
|
:-
|
|
|
|
-- Whitespace
|
|
$white+ ;
|
|
"/*"[.\n]*"*/" ;
|
|
|
|
-- Numbers
|
|
$decdigit+ { emitI 0 (IntTok 10) }
|
|
"0x"$hexdigit+ { emitI 2 (IntTok 16) }
|
|
"0o"$octdigit+ { emitI 2 (IntTok 8) }
|
|
"0b"$bindigit+ { emitI 2 (IntTok 2) }
|
|
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok}
|
|
$decdigit+"e""-"?$decdigit+ { emitS FloatTok}
|
|
|
|
-- Identifier
|
|
$typestart $identrest* { emitS TypeIdent }
|
|
$valstart $identrest* { emitS ValIdent }
|
|
$opident+ { emitS (OpIdent (LeftAssoc 9)) }
|
|
|
|
-- Characters and Strings
|
|
['].['] { emitS CharTok }
|
|
['] [\\] $escape_char ['] { emitS CharTok }
|
|
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS StringTok }
|
|
|
|
-- Symbols
|
|
"(" { emitT "(" }
|
|
")" { emitT ")" }
|
|
"[" { emitT "[" }
|
|
"]" { emitT "]" }
|
|
"{" { emitT "{" }
|
|
"}" { emitT "}" }
|
|
";" { emitT ";" }
|
|
"," { emitT "," }
|
|
"`" { emitT "`" }
|
|
[\\] { emitT "\\" }
|
|
|
|
{
|
|
|
|
lexer :: Origin -> Maybe Position -> Text -> [Located Token]
|
|
lexer src mbPos txt = go (AlexInput startPos txt)
|
|
where
|
|
startPos = fromMaybe initialPosition mbPos
|
|
go input =
|
|
case alexScan input 0 of
|
|
AlexEOF -> let AlexInput pos _ = input
|
|
loc = Location src pos pos
|
|
in [EOFTok `locatedAt` loc]
|
|
AlexError input' -> let AlexInput pos text = input'
|
|
(as, bs) = T.break isSpace text
|
|
pos' = advanceWith' pos as
|
|
input'' = AlexInput pos' bs
|
|
loc = Location src pos pos'
|
|
in (ErrorTok as `locatedAt` loc) : go input''
|
|
AlexSkip input' _ -> go input'
|
|
AlexToken input' len act -> act src len input : go input'
|
|
|
|
data AlexInput = AlexInput !Position Text
|
|
|
|
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
|
alexGetByte (AlexInput p t) =
|
|
do (c, rest) <- T.uncons t
|
|
return (byteForChar c, (AlexInput (p `advanceWith` c) rest))
|
|
where
|
|
byteForChar c | isAscii c = fromIntegral (ord c)
|
|
| otherwise = 0
|
|
|
|
type AlexAction = Origin -> Int -> AlexInput -> Located Token
|
|
|
|
emitT :: Text -> AlexAction
|
|
emitT str = emitS (const (Special str))
|
|
|
|
emitS :: (Text -> Token) -> AlexAction
|
|
emitS mk src len (AlexInput pos t) = token `locatedAt` loc
|
|
where
|
|
txt = T.take (fromIntegral len) t
|
|
token = mk txt
|
|
loc = Location src pos (pos `advanceWith'` txt)
|
|
|
|
emitI :: Int64 -> (Text -> Token) -> AlexAction
|
|
emitI dropCount mk src len (AlexInput pos t) = token `locatedAt` loc
|
|
where
|
|
baseText = T.take (fromIntegral len) t
|
|
txt = T.drop dropCount baseText
|
|
token = mk txt
|
|
loc = Location src pos (pos `advanceWith'` baseText)
|
|
|
|
}
|