Lexer integration.
This commit is contained in:
64
src/Bang/CommandLine.hs
Normal file
64
src/Bang/CommandLine.hs
Normal file
@@ -0,0 +1,64 @@
|
||||
module Bang.CommandLine(
|
||||
BangCommand(..)
|
||||
, BangOperation(..)
|
||||
, LexerOptions(..)
|
||||
, getCommand
|
||||
, helpString
|
||||
)
|
||||
where
|
||||
|
||||
import Options.Applicative
|
||||
import Options.Applicative.Help
|
||||
|
||||
data BangCommand = BangCommand {
|
||||
cmdVerbosity :: Verbosity
|
||||
, cmdOutputFile :: FilePath
|
||||
, cmdCommand :: BangOperation
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
data Verbosity = Silent | Normal | Verbose
|
||||
deriving (Eq, Show)
|
||||
|
||||
verboseOption :: Parser Verbosity
|
||||
verboseOption = flag Normal Silent (short 'q' <> long "quiet")
|
||||
<|> flag Normal Verbose (short 'v' <> long "verbose")
|
||||
|
||||
outputFile :: Parser FilePath
|
||||
outputFile = strOption (short 'o' <> long "output-file" <> metavar "FILE"
|
||||
<> help "The file to output as a result of this action."
|
||||
<> value "/dev/stdout" <> showDefault)
|
||||
|
||||
data BangOperation = Help
|
||||
| Version
|
||||
| Lex LexerOptions
|
||||
deriving (Show)
|
||||
|
||||
bangOperation :: Parser BangOperation
|
||||
bangOperation = subparser $
|
||||
command "help" (pure Help `withInfo` "Describe common commands.") <>
|
||||
command "version" (pure Version `withInfo` "Display version information.") <>
|
||||
command "lex" (parseLex `withInfo` "Lex a file into its component tokens.")
|
||||
|
||||
withInfo :: Parser a -> String -> ParserInfo a
|
||||
withInfo opts desc = info (helper <*> opts) (progDesc desc)
|
||||
|
||||
data LexerOptions = LexerOptions {
|
||||
lexInputFile :: FilePath
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
parseLex :: Parser BangOperation
|
||||
parseLex = Lex <$> parseLexOptions
|
||||
|
||||
parseLexOptions :: Parser LexerOptions
|
||||
parseLexOptions = LexerOptions <$> argument str (metavar "FILE")
|
||||
|
||||
parseOptions :: Parser BangCommand
|
||||
parseOptions = BangCommand <$> verboseOption <*> outputFile <*> bangOperation
|
||||
|
||||
helpString :: String
|
||||
helpString = show (parserHelp (ParserPrefs "" False False True 80) parseOptions)
|
||||
|
||||
getCommand :: IO BangCommand
|
||||
getCommand = execParser (parseOptions `withInfo` "Run a bang language action.")
|
||||
107
src/Bang/Syntax/Lexer.x
Normal file
107
src/Bang/Syntax/Lexer.x
Normal file
@@ -0,0 +1,107 @@
|
||||
{
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# OPTIONS -w #-}
|
||||
module Bang.Syntax.Lexer(lexer)
|
||||
where
|
||||
|
||||
import Bang.Syntax.Location
|
||||
import Bang.Syntax.Name
|
||||
import Bang.Syntax.Token
|
||||
import Data.Char(isSpace, isAscii, ord)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Text.Lazy(Text)
|
||||
import qualified Data.Text.Lazy as T
|
||||
import Data.Word(Word8)
|
||||
|
||||
}
|
||||
|
||||
-- Digits
|
||||
$decdigit = 0-9
|
||||
$hexdigit = [0-9a-fA-f]
|
||||
$octdigit = 0-7
|
||||
$bindigit = [01]
|
||||
|
||||
-- Identifier Characters
|
||||
$typestart = [A-Z\_]
|
||||
$valstart = [a-z\_]
|
||||
$identrest = [a-zA-Z0-9\_\.]
|
||||
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|:]
|
||||
$escape_char = [abfnrtv'\"\\]
|
||||
|
||||
:-
|
||||
|
||||
-- Whitespace
|
||||
$white+ ;
|
||||
"/*".*"*/" ;
|
||||
|
||||
-- Numbers
|
||||
$decdigit+ { emitS (IntTok 10) }
|
||||
"0x"$hexdigit+ { emitS (IntTok 16) }
|
||||
"0o"$octdigit+ { emitS (IntTok 8) }
|
||||
"0b"$bindigit+ { emitS (IntTok 2) }
|
||||
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok}
|
||||
$decdigit+"e""-"?$decdigit+ { emitS FloatTok}
|
||||
|
||||
-- Identifier
|
||||
$typestart $identrest* { emitS TypeIdent }
|
||||
$valstart $identrest* { emitS ValIdent }
|
||||
$opident+ { emitS OpIdent }
|
||||
|
||||
-- Characters and Strings
|
||||
['].['] { emitS CharTok }
|
||||
['] [\\] $escape_char ['] { emitS CharTok }
|
||||
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS StringTok }
|
||||
|
||||
-- Symbols
|
||||
"(" { emitT "(" }
|
||||
")" { emitT ")" }
|
||||
"[" { emitT "[" }
|
||||
"]" { emitT "]" }
|
||||
"{" { emitT "{" }
|
||||
"}" { emitT "}" }
|
||||
";" { emitT ";" }
|
||||
"," { emitT "," }
|
||||
"`" { emitT "`" }
|
||||
[\\] { emitT "\\" }
|
||||
|
||||
{
|
||||
|
||||
lexer :: Origin -> Maybe Position -> Text -> [Located Token]
|
||||
lexer src mbPos txt = go (AlexInput startPos txt)
|
||||
where
|
||||
startPos = fromMaybe initialPosition mbPos
|
||||
go input =
|
||||
case alexScan input 0 of
|
||||
AlexEOF -> []
|
||||
AlexError input' -> let AlexInput pos text = input'
|
||||
(as, bs) = T.break isSpace text
|
||||
pos' = advanceWith' pos as
|
||||
input'' = AlexInput pos' bs
|
||||
loc = Location src pos pos'
|
||||
in (ErrorTok as `locatedAt` loc) : go input''
|
||||
AlexSkip input' _ -> go input'
|
||||
AlexToken input' len act -> act src len input : go input'
|
||||
|
||||
data AlexInput = AlexInput !Position Text
|
||||
|
||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
||||
alexGetByte (AlexInput p t) =
|
||||
do (c, rest) <- T.uncons t
|
||||
return (byteForChar c, (AlexInput (p `advanceWith` c) rest))
|
||||
where
|
||||
byteForChar c | isAscii c = fromIntegral (ord c)
|
||||
| otherwise = 0
|
||||
|
||||
type AlexAction = Origin -> Int -> AlexInput -> Located Token
|
||||
|
||||
emitT :: Text -> AlexAction
|
||||
emitT str = emitS (const (Special str))
|
||||
|
||||
emitS :: (Text -> Token) -> AlexAction
|
||||
emitS mk src len (AlexInput pos t) = token `locatedAt` loc
|
||||
where
|
||||
txt = T.take (fromIntegral len) t
|
||||
token = mk txt
|
||||
loc = Location src pos (pos `advanceWith'` txt)
|
||||
|
||||
}
|
||||
68
src/Bang/Syntax/Location.hs
Normal file
68
src/Bang/Syntax/Location.hs
Normal file
@@ -0,0 +1,68 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module Bang.Syntax.Location(
|
||||
Position, posRow, posColumn, posOffset
|
||||
, initialPosition
|
||||
, advanceWith, advanceWith'
|
||||
, showPosition
|
||||
, Origin(..)
|
||||
, Location(Location)
|
||||
, locSource, locStart, locEnd
|
||||
, Located(..)
|
||||
, locatedAt
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Lens.TH(makeLenses)
|
||||
import Data.Text.Lazy(Text)
|
||||
import qualified Data.Text.Lazy as T
|
||||
|
||||
data Position = Position {
|
||||
_posRow :: Word
|
||||
, _posColumn :: Word
|
||||
, _posOffset :: Word
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
makeLenses ''Position
|
||||
|
||||
initialPosition :: Position
|
||||
initialPosition = Position 1 1 0
|
||||
|
||||
instance Eq Position where
|
||||
a == b = _posOffset a == _posOffset b
|
||||
|
||||
advanceWith :: Position -> Char -> Position
|
||||
advanceWith (Position r c o) '\t' = Position r (c+8) (o+1)
|
||||
advanceWith (Position r _ o) '\n' = Position (r+1) 1 (o+1)
|
||||
advanceWith (Position r c o) _ = Position r (c+1) (o+1)
|
||||
|
||||
advanceWith' :: Position -> Text -> Position
|
||||
advanceWith' pos txt =
|
||||
case T.uncons txt of
|
||||
Nothing -> pos
|
||||
Just (c, rest) -> advanceWith' (pos `advanceWith` c) rest
|
||||
|
||||
showPosition :: Position -> String
|
||||
showPosition (Position r c _) = show r ++ ":" ++ show c
|
||||
|
||||
data Origin = Unknown
|
||||
| Interactive
|
||||
| File FilePath
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Location = Location {
|
||||
_locSource :: Origin
|
||||
, _locStart :: Position
|
||||
, _locEnd :: Position
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
makeLenses ''Location
|
||||
|
||||
data Located a = Located !Location a
|
||||
|
||||
instance Show a => Show (Located a) where
|
||||
show (Located l x) = show x ++ " `locatedAt` " ++ show l
|
||||
|
||||
locatedAt :: a -> Location -> Located a
|
||||
locatedAt a p = Located p a
|
||||
32
src/Bang/Syntax/Name.hs
Normal file
32
src/Bang/Syntax/Name.hs
Normal file
@@ -0,0 +1,32 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module Bang.Syntax.Name(
|
||||
Name
|
||||
, nameId
|
||||
, nameString
|
||||
, nameGenerated
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Lens.TH(makeLenses)
|
||||
|
||||
data Name = Name {
|
||||
_nameId :: !Word
|
||||
, _nameString :: !String
|
||||
, _nameGenerated :: !Bool
|
||||
}
|
||||
|
||||
makeLenses ''Name
|
||||
|
||||
instance Eq Name where
|
||||
a == b = _nameId a == _nameId b
|
||||
a /= b = _nameId a /= _nameId b
|
||||
|
||||
instance Ord Name where
|
||||
compare a b = compare (_nameId a) (_nameId b)
|
||||
max a b = if a > b then a else b
|
||||
min a b = if a > b then b else a
|
||||
a < b = _nameId a < _nameId b
|
||||
a <= b = _nameId a <= _nameId b
|
||||
a > b = _nameId a > _nameId b
|
||||
a >= b = _nameId a >= _nameId b
|
||||
|
||||
18
src/Bang/Syntax/Token.hs
Normal file
18
src/Bang/Syntax/Token.hs
Normal file
@@ -0,0 +1,18 @@
|
||||
module Bang.Syntax.Token(
|
||||
Token(..)
|
||||
)
|
||||
where
|
||||
|
||||
import Data.Text.Lazy(Text)
|
||||
|
||||
data Token = CharTok Text
|
||||
| FloatTok Text
|
||||
| IntTok Word Text
|
||||
| OpIdent Text
|
||||
| Special Text
|
||||
| StringTok Text
|
||||
| TypeIdent Text
|
||||
| ValIdent Text
|
||||
| ErrorTok Text
|
||||
| EOFTok
|
||||
deriving (Show)
|
||||
Reference in New Issue
Block a user