Lexer integration.

This commit is contained in:
2016-04-30 21:52:37 -07:00
parent edc2a7161a
commit a9dc45d93d
9 changed files with 271 additions and 199 deletions

64
src/Bang/CommandLine.hs Normal file
View File

@@ -0,0 +1,64 @@
module Bang.CommandLine(
BangCommand(..)
, BangOperation(..)
, LexerOptions(..)
, getCommand
, helpString
)
where
import Options.Applicative
import Options.Applicative.Help
data BangCommand = BangCommand {
cmdVerbosity :: Verbosity
, cmdOutputFile :: FilePath
, cmdCommand :: BangOperation
}
deriving (Show)
data Verbosity = Silent | Normal | Verbose
deriving (Eq, Show)
verboseOption :: Parser Verbosity
verboseOption = flag Normal Silent (short 'q' <> long "quiet")
<|> flag Normal Verbose (short 'v' <> long "verbose")
outputFile :: Parser FilePath
outputFile = strOption (short 'o' <> long "output-file" <> metavar "FILE"
<> help "The file to output as a result of this action."
<> value "/dev/stdout" <> showDefault)
data BangOperation = Help
| Version
| Lex LexerOptions
deriving (Show)
bangOperation :: Parser BangOperation
bangOperation = subparser $
command "help" (pure Help `withInfo` "Describe common commands.") <>
command "version" (pure Version `withInfo` "Display version information.") <>
command "lex" (parseLex `withInfo` "Lex a file into its component tokens.")
withInfo :: Parser a -> String -> ParserInfo a
withInfo opts desc = info (helper <*> opts) (progDesc desc)
data LexerOptions = LexerOptions {
lexInputFile :: FilePath
}
deriving (Show)
parseLex :: Parser BangOperation
parseLex = Lex <$> parseLexOptions
parseLexOptions :: Parser LexerOptions
parseLexOptions = LexerOptions <$> argument str (metavar "FILE")
parseOptions :: Parser BangCommand
parseOptions = BangCommand <$> verboseOption <*> outputFile <*> bangOperation
helpString :: String
helpString = show (parserHelp (ParserPrefs "" False False True 80) parseOptions)
getCommand :: IO BangCommand
getCommand = execParser (parseOptions `withInfo` "Run a bang language action.")

107
src/Bang/Syntax/Lexer.x Normal file
View File

@@ -0,0 +1,107 @@
{
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS -w #-}
module Bang.Syntax.Lexer(lexer)
where
import Bang.Syntax.Location
import Bang.Syntax.Name
import Bang.Syntax.Token
import Data.Char(isSpace, isAscii, ord)
import Data.Maybe(fromMaybe)
import Data.Text.Lazy(Text)
import qualified Data.Text.Lazy as T
import Data.Word(Word8)
}
-- Digits
$decdigit = 0-9
$hexdigit = [0-9a-fA-f]
$octdigit = 0-7
$bindigit = [01]
-- Identifier Characters
$typestart = [A-Z\_]
$valstart = [a-z\_]
$identrest = [a-zA-Z0-9\_\.]
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|:]
$escape_char = [abfnrtv'\"\\]
:-
-- Whitespace
$white+ ;
"/*".*"*/" ;
-- Numbers
$decdigit+ { emitS (IntTok 10) }
"0x"$hexdigit+ { emitS (IntTok 16) }
"0o"$octdigit+ { emitS (IntTok 8) }
"0b"$bindigit+ { emitS (IntTok 2) }
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok}
$decdigit+"e""-"?$decdigit+ { emitS FloatTok}
-- Identifier
$typestart $identrest* { emitS TypeIdent }
$valstart $identrest* { emitS ValIdent }
$opident+ { emitS OpIdent }
-- Characters and Strings
['].['] { emitS CharTok }
['] [\\] $escape_char ['] { emitS CharTok }
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS StringTok }
-- Symbols
"(" { emitT "(" }
")" { emitT ")" }
"[" { emitT "[" }
"]" { emitT "]" }
"{" { emitT "{" }
"}" { emitT "}" }
";" { emitT ";" }
"," { emitT "," }
"`" { emitT "`" }
[\\] { emitT "\\" }
{
lexer :: Origin -> Maybe Position -> Text -> [Located Token]
lexer src mbPos txt = go (AlexInput startPos txt)
where
startPos = fromMaybe initialPosition mbPos
go input =
case alexScan input 0 of
AlexEOF -> []
AlexError input' -> let AlexInput pos text = input'
(as, bs) = T.break isSpace text
pos' = advanceWith' pos as
input'' = AlexInput pos' bs
loc = Location src pos pos'
in (ErrorTok as `locatedAt` loc) : go input''
AlexSkip input' _ -> go input'
AlexToken input' len act -> act src len input : go input'
data AlexInput = AlexInput !Position Text
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (AlexInput p t) =
do (c, rest) <- T.uncons t
return (byteForChar c, (AlexInput (p `advanceWith` c) rest))
where
byteForChar c | isAscii c = fromIntegral (ord c)
| otherwise = 0
type AlexAction = Origin -> Int -> AlexInput -> Located Token
emitT :: Text -> AlexAction
emitT str = emitS (const (Special str))
emitS :: (Text -> Token) -> AlexAction
emitS mk src len (AlexInput pos t) = token `locatedAt` loc
where
txt = T.take (fromIntegral len) t
token = mk txt
loc = Location src pos (pos `advanceWith'` txt)
}

View File

@@ -0,0 +1,68 @@
{-# LANGUAGE TemplateHaskell #-}
module Bang.Syntax.Location(
Position, posRow, posColumn, posOffset
, initialPosition
, advanceWith, advanceWith'
, showPosition
, Origin(..)
, Location(Location)
, locSource, locStart, locEnd
, Located(..)
, locatedAt
)
where
import Control.Lens.TH(makeLenses)
import Data.Text.Lazy(Text)
import qualified Data.Text.Lazy as T
data Position = Position {
_posRow :: Word
, _posColumn :: Word
, _posOffset :: Word
}
deriving (Show)
makeLenses ''Position
initialPosition :: Position
initialPosition = Position 1 1 0
instance Eq Position where
a == b = _posOffset a == _posOffset b
advanceWith :: Position -> Char -> Position
advanceWith (Position r c o) '\t' = Position r (c+8) (o+1)
advanceWith (Position r _ o) '\n' = Position (r+1) 1 (o+1)
advanceWith (Position r c o) _ = Position r (c+1) (o+1)
advanceWith' :: Position -> Text -> Position
advanceWith' pos txt =
case T.uncons txt of
Nothing -> pos
Just (c, rest) -> advanceWith' (pos `advanceWith` c) rest
showPosition :: Position -> String
showPosition (Position r c _) = show r ++ ":" ++ show c
data Origin = Unknown
| Interactive
| File FilePath
deriving (Eq, Show)
data Location = Location {
_locSource :: Origin
, _locStart :: Position
, _locEnd :: Position
}
deriving (Eq, Show)
makeLenses ''Location
data Located a = Located !Location a
instance Show a => Show (Located a) where
show (Located l x) = show x ++ " `locatedAt` " ++ show l
locatedAt :: a -> Location -> Located a
locatedAt a p = Located p a

32
src/Bang/Syntax/Name.hs Normal file
View File

@@ -0,0 +1,32 @@
{-# LANGUAGE TemplateHaskell #-}
module Bang.Syntax.Name(
Name
, nameId
, nameString
, nameGenerated
)
where
import Control.Lens.TH(makeLenses)
data Name = Name {
_nameId :: !Word
, _nameString :: !String
, _nameGenerated :: !Bool
}
makeLenses ''Name
instance Eq Name where
a == b = _nameId a == _nameId b
a /= b = _nameId a /= _nameId b
instance Ord Name where
compare a b = compare (_nameId a) (_nameId b)
max a b = if a > b then a else b
min a b = if a > b then b else a
a < b = _nameId a < _nameId b
a <= b = _nameId a <= _nameId b
a > b = _nameId a > _nameId b
a >= b = _nameId a >= _nameId b

18
src/Bang/Syntax/Token.hs Normal file
View File

@@ -0,0 +1,18 @@
module Bang.Syntax.Token(
Token(..)
)
where
import Data.Text.Lazy(Text)
data Token = CharTok Text
| FloatTok Text
| IntTok Word Text
| OpIdent Text
| Special Text
| StringTok Text
| TypeIdent Text
| ValIdent Text
| ErrorTok Text
| EOFTok
deriving (Show)