Lexer integration.

This commit is contained in:
2016-04-30 21:52:37 -07:00
parent edc2a7161a
commit a9dc45d93d
9 changed files with 271 additions and 199 deletions

107
src/Bang/Syntax/Lexer.x Normal file
View File

@@ -0,0 +1,107 @@
{
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS -w #-}
module Bang.Syntax.Lexer(lexer)
where
import Bang.Syntax.Location
import Bang.Syntax.Name
import Bang.Syntax.Token
import Data.Char(isSpace, isAscii, ord)
import Data.Maybe(fromMaybe)
import Data.Text.Lazy(Text)
import qualified Data.Text.Lazy as T
import Data.Word(Word8)
}
-- Digits
$decdigit = 0-9
$hexdigit = [0-9a-fA-f]
$octdigit = 0-7
$bindigit = [01]
-- Identifier Characters
$typestart = [A-Z\_]
$valstart = [a-z\_]
$identrest = [a-zA-Z0-9\_\.]
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|:]
$escape_char = [abfnrtv'\"\\]
:-
-- Whitespace
$white+ ;
"/*".*"*/" ;
-- Numbers
$decdigit+ { emitS (IntTok 10) }
"0x"$hexdigit+ { emitS (IntTok 16) }
"0o"$octdigit+ { emitS (IntTok 8) }
"0b"$bindigit+ { emitS (IntTok 2) }
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok}
$decdigit+"e""-"?$decdigit+ { emitS FloatTok}
-- Identifier
$typestart $identrest* { emitS TypeIdent }
$valstart $identrest* { emitS ValIdent }
$opident+ { emitS OpIdent }
-- Characters and Strings
['].['] { emitS CharTok }
['] [\\] $escape_char ['] { emitS CharTok }
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS StringTok }
-- Symbols
"(" { emitT "(" }
")" { emitT ")" }
"[" { emitT "[" }
"]" { emitT "]" }
"{" { emitT "{" }
"}" { emitT "}" }
";" { emitT ";" }
"," { emitT "," }
"`" { emitT "`" }
[\\] { emitT "\\" }
{
lexer :: Origin -> Maybe Position -> Text -> [Located Token]
lexer src mbPos txt = go (AlexInput startPos txt)
where
startPos = fromMaybe initialPosition mbPos
go input =
case alexScan input 0 of
AlexEOF -> []
AlexError input' -> let AlexInput pos text = input'
(as, bs) = T.break isSpace text
pos' = advanceWith' pos as
input'' = AlexInput pos' bs
loc = Location src pos pos'
in (ErrorTok as `locatedAt` loc) : go input''
AlexSkip input' _ -> go input'
AlexToken input' len act -> act src len input : go input'
data AlexInput = AlexInput !Position Text
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (AlexInput p t) =
do (c, rest) <- T.uncons t
return (byteForChar c, (AlexInput (p `advanceWith` c) rest))
where
byteForChar c | isAscii c = fromIntegral (ord c)
| otherwise = 0
type AlexAction = Origin -> Int -> AlexInput -> Located Token
emitT :: Text -> AlexAction
emitT str = emitS (const (Special str))
emitS :: (Text -> Token) -> AlexAction
emitS mk src len (AlexInput pos t) = token `locatedAt` loc
where
txt = T.take (fromIntegral len) t
token = mk txt
loc = Location src pos (pos `advanceWith'` txt)
}