Lexer integration.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,7 +5,6 @@
|
||||
*.bak
|
||||
hsrc/Syntax/Lexer.hs
|
||||
hsrc/Syntax/Parser.hs
|
||||
bang
|
||||
|
||||
.cabal-sandbox/
|
||||
dist/
|
||||
|
||||
27
bang.cabal
27
bang.cabal
@@ -14,14 +14,31 @@ cabal-version: >=1.10
|
||||
|
||||
executable bang
|
||||
main-is: Main.hs
|
||||
build-depends: base >= 4.8 && < 4.9,
|
||||
build-depends:
|
||||
array >= 0.5.1.0 && < 0.7,
|
||||
base >= 4.8 && < 4.9,
|
||||
bytestring >= 0.10 && < 0.11,
|
||||
optparse-applicative >= 0.12.1 && < 0.14
|
||||
lens >= 4.14 && < 4.18,
|
||||
monadLib >= 3.7.3 && < 3.9,
|
||||
optparse-applicative >= 0.12.1 && < 0.14,
|
||||
text >= 1.2.2.1 && < 1.4
|
||||
hs-source-dirs: src
|
||||
build-tools: alex, happy
|
||||
ghc-options: -Wall
|
||||
default-language: Haskell2010
|
||||
other-extensions: DeriveDataTypeable
|
||||
other-extensions: CPP,
|
||||
DeriveDataTypeable,
|
||||
DeriveFunctor,
|
||||
GeneralizedNewtypeDeriving,
|
||||
MagicHash,
|
||||
MultiParamTypeClasses,
|
||||
OverloadedStrings,
|
||||
TemplateHaskell
|
||||
other-modules:
|
||||
Paths_bang,
|
||||
Syntax.CommandLine
|
||||
Bang.CommandLine,
|
||||
Bang.Syntax.Lexer,
|
||||
Bang.Syntax.Location,
|
||||
Bang.Syntax.Name,
|
||||
Bang.Syntax.Token,
|
||||
Paths_bang
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
module Syntax.CommandLine(
|
||||
module Bang.CommandLine(
|
||||
BangCommand(..)
|
||||
, BangOperation(..)
|
||||
, LexerOptions(..)
|
||||
@@ -7,8 +7,6 @@ module Syntax.CommandLine(
|
||||
)
|
||||
where
|
||||
|
||||
import Data.Version(showVersion)
|
||||
import Paths_bang(version)
|
||||
import Options.Applicative
|
||||
import Options.Applicative.Help
|
||||
|
||||
@@ -32,12 +30,14 @@ outputFile = strOption (short 'o' <> long "output-file" <> metavar "FILE"
|
||||
<> value "/dev/stdout" <> showDefault)
|
||||
|
||||
data BangOperation = Help
|
||||
| Version
|
||||
| Lex LexerOptions
|
||||
deriving (Show)
|
||||
|
||||
bangOperation :: Parser BangOperation
|
||||
bangOperation = subparser $
|
||||
command "help" (pure Help `withInfo` "Describe common commands.") <>
|
||||
command "version" (pure Version `withInfo` "Display version information.") <>
|
||||
command "lex" (parseLex `withInfo` "Lex a file into its component tokens.")
|
||||
|
||||
withInfo :: Parser a -> String -> ParserInfo a
|
||||
107
src/Bang/Syntax/Lexer.x
Normal file
107
src/Bang/Syntax/Lexer.x
Normal file
@@ -0,0 +1,107 @@
|
||||
{
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# OPTIONS -w #-}
|
||||
module Bang.Syntax.Lexer(lexer)
|
||||
where
|
||||
|
||||
import Bang.Syntax.Location
|
||||
import Bang.Syntax.Name
|
||||
import Bang.Syntax.Token
|
||||
import Data.Char(isSpace, isAscii, ord)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Text.Lazy(Text)
|
||||
import qualified Data.Text.Lazy as T
|
||||
import Data.Word(Word8)
|
||||
|
||||
}
|
||||
|
||||
-- Digits
|
||||
$decdigit = 0-9
|
||||
$hexdigit = [0-9a-fA-f]
|
||||
$octdigit = 0-7
|
||||
$bindigit = [01]
|
||||
|
||||
-- Identifier Characters
|
||||
$typestart = [A-Z\_]
|
||||
$valstart = [a-z\_]
|
||||
$identrest = [a-zA-Z0-9\_\.]
|
||||
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|:]
|
||||
$escape_char = [abfnrtv'\"\\]
|
||||
|
||||
:-
|
||||
|
||||
-- Whitespace
|
||||
$white+ ;
|
||||
"/*".*"*/" ;
|
||||
|
||||
-- Numbers
|
||||
$decdigit+ { emitS (IntTok 10) }
|
||||
"0x"$hexdigit+ { emitS (IntTok 16) }
|
||||
"0o"$octdigit+ { emitS (IntTok 8) }
|
||||
"0b"$bindigit+ { emitS (IntTok 2) }
|
||||
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok}
|
||||
$decdigit+"e""-"?$decdigit+ { emitS FloatTok}
|
||||
|
||||
-- Identifier
|
||||
$typestart $identrest* { emitS TypeIdent }
|
||||
$valstart $identrest* { emitS ValIdent }
|
||||
$opident+ { emitS OpIdent }
|
||||
|
||||
-- Characters and Strings
|
||||
['].['] { emitS CharTok }
|
||||
['] [\\] $escape_char ['] { emitS CharTok }
|
||||
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS StringTok }
|
||||
|
||||
-- Symbols
|
||||
"(" { emitT "(" }
|
||||
")" { emitT ")" }
|
||||
"[" { emitT "[" }
|
||||
"]" { emitT "]" }
|
||||
"{" { emitT "{" }
|
||||
"}" { emitT "}" }
|
||||
";" { emitT ";" }
|
||||
"," { emitT "," }
|
||||
"`" { emitT "`" }
|
||||
[\\] { emitT "\\" }
|
||||
|
||||
{
|
||||
|
||||
lexer :: Origin -> Maybe Position -> Text -> [Located Token]
|
||||
lexer src mbPos txt = go (AlexInput startPos txt)
|
||||
where
|
||||
startPos = fromMaybe initialPosition mbPos
|
||||
go input =
|
||||
case alexScan input 0 of
|
||||
AlexEOF -> []
|
||||
AlexError input' -> let AlexInput pos text = input'
|
||||
(as, bs) = T.break isSpace text
|
||||
pos' = advanceWith' pos as
|
||||
input'' = AlexInput pos' bs
|
||||
loc = Location src pos pos'
|
||||
in (ErrorTok as `locatedAt` loc) : go input''
|
||||
AlexSkip input' _ -> go input'
|
||||
AlexToken input' len act -> act src len input : go input'
|
||||
|
||||
data AlexInput = AlexInput !Position Text
|
||||
|
||||
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
|
||||
alexGetByte (AlexInput p t) =
|
||||
do (c, rest) <- T.uncons t
|
||||
return (byteForChar c, (AlexInput (p `advanceWith` c) rest))
|
||||
where
|
||||
byteForChar c | isAscii c = fromIntegral (ord c)
|
||||
| otherwise = 0
|
||||
|
||||
type AlexAction = Origin -> Int -> AlexInput -> Located Token
|
||||
|
||||
emitT :: Text -> AlexAction
|
||||
emitT str = emitS (const (Special str))
|
||||
|
||||
emitS :: (Text -> Token) -> AlexAction
|
||||
emitS mk src len (AlexInput pos t) = token `locatedAt` loc
|
||||
where
|
||||
txt = T.take (fromIntegral len) t
|
||||
token = mk txt
|
||||
loc = Location src pos (pos `advanceWith'` txt)
|
||||
|
||||
}
|
||||
68
src/Bang/Syntax/Location.hs
Normal file
68
src/Bang/Syntax/Location.hs
Normal file
@@ -0,0 +1,68 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module Bang.Syntax.Location(
|
||||
Position, posRow, posColumn, posOffset
|
||||
, initialPosition
|
||||
, advanceWith, advanceWith'
|
||||
, showPosition
|
||||
, Origin(..)
|
||||
, Location(Location)
|
||||
, locSource, locStart, locEnd
|
||||
, Located(..)
|
||||
, locatedAt
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Lens.TH(makeLenses)
|
||||
import Data.Text.Lazy(Text)
|
||||
import qualified Data.Text.Lazy as T
|
||||
|
||||
data Position = Position {
|
||||
_posRow :: Word
|
||||
, _posColumn :: Word
|
||||
, _posOffset :: Word
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
makeLenses ''Position
|
||||
|
||||
initialPosition :: Position
|
||||
initialPosition = Position 1 1 0
|
||||
|
||||
instance Eq Position where
|
||||
a == b = _posOffset a == _posOffset b
|
||||
|
||||
advanceWith :: Position -> Char -> Position
|
||||
advanceWith (Position r c o) '\t' = Position r (c+8) (o+1)
|
||||
advanceWith (Position r _ o) '\n' = Position (r+1) 1 (o+1)
|
||||
advanceWith (Position r c o) _ = Position r (c+1) (o+1)
|
||||
|
||||
advanceWith' :: Position -> Text -> Position
|
||||
advanceWith' pos txt =
|
||||
case T.uncons txt of
|
||||
Nothing -> pos
|
||||
Just (c, rest) -> advanceWith' (pos `advanceWith` c) rest
|
||||
|
||||
showPosition :: Position -> String
|
||||
showPosition (Position r c _) = show r ++ ":" ++ show c
|
||||
|
||||
data Origin = Unknown
|
||||
| Interactive
|
||||
| File FilePath
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Location = Location {
|
||||
_locSource :: Origin
|
||||
, _locStart :: Position
|
||||
, _locEnd :: Position
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
makeLenses ''Location
|
||||
|
||||
data Located a = Located !Location a
|
||||
|
||||
instance Show a => Show (Located a) where
|
||||
show (Located l x) = show x ++ " `locatedAt` " ++ show l
|
||||
|
||||
locatedAt :: a -> Location -> Located a
|
||||
locatedAt a p = Located p a
|
||||
32
src/Bang/Syntax/Name.hs
Normal file
32
src/Bang/Syntax/Name.hs
Normal file
@@ -0,0 +1,32 @@
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
module Bang.Syntax.Name(
|
||||
Name
|
||||
, nameId
|
||||
, nameString
|
||||
, nameGenerated
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Lens.TH(makeLenses)
|
||||
|
||||
data Name = Name {
|
||||
_nameId :: !Word
|
||||
, _nameString :: !String
|
||||
, _nameGenerated :: !Bool
|
||||
}
|
||||
|
||||
makeLenses ''Name
|
||||
|
||||
instance Eq Name where
|
||||
a == b = _nameId a == _nameId b
|
||||
a /= b = _nameId a /= _nameId b
|
||||
|
||||
instance Ord Name where
|
||||
compare a b = compare (_nameId a) (_nameId b)
|
||||
max a b = if a > b then a else b
|
||||
min a b = if a > b then b else a
|
||||
a < b = _nameId a < _nameId b
|
||||
a <= b = _nameId a <= _nameId b
|
||||
a > b = _nameId a > _nameId b
|
||||
a >= b = _nameId a >= _nameId b
|
||||
|
||||
18
src/Bang/Syntax/Token.hs
Normal file
18
src/Bang/Syntax/Token.hs
Normal file
@@ -0,0 +1,18 @@
|
||||
module Bang.Syntax.Token(
|
||||
Token(..)
|
||||
)
|
||||
where
|
||||
|
||||
import Data.Text.Lazy(Text)
|
||||
|
||||
data Token = CharTok Text
|
||||
| FloatTok Text
|
||||
| IntTok Word Text
|
||||
| OpIdent Text
|
||||
| Special Text
|
||||
| StringTok Text
|
||||
| TypeIdent Text
|
||||
| ValIdent Text
|
||||
| ErrorTok Text
|
||||
| EOFTok
|
||||
deriving (Show)
|
||||
76
src/Main.hs
76
src/Main.hs
@@ -1,66 +1,26 @@
|
||||
import Syntax.CommandLine
|
||||
import Bang.CommandLine
|
||||
import Bang.Syntax.Lexer
|
||||
import Bang.Syntax.Location
|
||||
import Control.Exception(tryJust)
|
||||
import Control.Monad(guard)
|
||||
import qualified Data.Text.Lazy.IO as T
|
||||
import Data.Version(showVersion)
|
||||
import Paths_bang(version)
|
||||
import System.IO.Error(isDoesNotExistError)
|
||||
|
||||
main :: IO ()
|
||||
main = getCommand >>= \ cmd ->
|
||||
case cmdCommand cmd of
|
||||
Lex _lexOpts -> putStrLn ("LEX: " ++ show cmd)
|
||||
Lex o -> runLexer cmd o
|
||||
Help -> putStrLn helpString
|
||||
Version -> putStrLn ("Bang tool, version " ++ showVersion version)
|
||||
|
||||
|
||||
{-
|
||||
import Control.Exception
|
||||
import Control.Monad
|
||||
import qualified Data.ByteString as S
|
||||
import System.Environment
|
||||
import System.Exit
|
||||
import System.IO.Error
|
||||
|
||||
import Syntax.AST
|
||||
import Syntax.Lexer
|
||||
import Syntax.Parser
|
||||
import Syntax.ParserCore
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
args <- getArgs
|
||||
case args of
|
||||
[file] -> do
|
||||
ast <- loadModule file
|
||||
putStrLn "Successful parse!"
|
||||
putStrLn (show ast)
|
||||
["-lex",path] -> do
|
||||
mtxt <- tryJust (guard . isDoesNotExistError) $ S.readFile path
|
||||
runLexer :: BangCommand -> LexerOptions -> IO ()
|
||||
runLexer _cmd opts =
|
||||
do let path = lexInputFile opts
|
||||
mtxt <- tryJust (guard . isDoesNotExistError) (T.readFile path)
|
||||
case mtxt of
|
||||
Left _ -> fail $ "Unable to open file: " ++ path
|
||||
Right txt -> do
|
||||
case runParser path txt pullTokens of
|
||||
Left err -> printError err >> exitWith (ExitFailure 1)
|
||||
Right ress -> do
|
||||
mapM_ putStrLn ress
|
||||
putStrLn "Successful lex."
|
||||
["-parse",path] -> do
|
||||
ast <- loadModule path
|
||||
putStrLn "Successful parse!"
|
||||
putStrLn (show ast)
|
||||
_ -> fail "Unacceptable arguments."
|
||||
|
||||
pullTokens :: Parser [String]
|
||||
pullTokens = do
|
||||
tok <- scan
|
||||
case tok of
|
||||
Lexeme pos tok' -> do
|
||||
let res = show pos ++ " " ++ show tok'
|
||||
if tok' == TokEOF
|
||||
then return [res]
|
||||
else return (res :) `ap` pullTokens
|
||||
|
||||
loadModule :: FilePath -> IO (Module Position)
|
||||
loadModule path = do
|
||||
mtxt <- tryJust (guard . isDoesNotExistError) $ S.readFile path
|
||||
case mtxt of
|
||||
Left _ -> fail $ "Unable to open file: " ++ path
|
||||
Left _ -> fail ("Unable to opten file: " ++ path)
|
||||
Right txt ->
|
||||
case runParser path txt parseModule of
|
||||
Left err -> printError err >> exitWith (ExitFailure 1)
|
||||
Right ast -> return ast
|
||||
-}
|
||||
do let tokens = lexer (File path) (Just initialPosition) txt
|
||||
mapM_ (putStrLn . show) tokens
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
{
|
||||
{-# LANGUAGE DeriveDataTypeable #-}
|
||||
{-# OPTIONS -w #-}
|
||||
module Syntax.Lexer where
|
||||
|
||||
import qualified Codec.Binary.UTF8.Generic as UTF8
|
||||
import qualified Data.ByteString as S
|
||||
import MonadLib
|
||||
|
||||
import Syntax.ParserCore
|
||||
|
||||
}
|
||||
|
||||
-- Digits
|
||||
$decdigit = 0-9
|
||||
$hexdigit = [0-9a-fA-f]
|
||||
$octdigit = 0-7
|
||||
$bindigit = [01]
|
||||
|
||||
-- Identifier Characters
|
||||
$typestart = [A-Z\_]
|
||||
$valstart = [a-z\_]
|
||||
$identrest = [a-zA-Z0-9\_\.]
|
||||
$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\<\>\?\_\|]
|
||||
$escape_char = [abfnrtv'\"\\]
|
||||
|
||||
:-
|
||||
|
||||
-- Whitespace
|
||||
$white+ ;
|
||||
"/*".*"*/" ;
|
||||
|
||||
-- Numbers
|
||||
$decdigit+ { emitS (buildInt 10) }
|
||||
"0x"$hexdigit+ { emitS (buildInt 16) }
|
||||
"0o"$octdigit+ { emitS (buildInt 8) }
|
||||
"0b"$bindigit+ { emitS (buildInt 2) }
|
||||
$decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS TokFloat}
|
||||
$decdigit+"e""-"?$decdigit+ { emitS TokFloat}
|
||||
|
||||
-- Identifier
|
||||
$typestart $identrest* { emitS TokTypeIdent }
|
||||
"prim%" $typestart $identrest* { emitS TokTypeIdent }
|
||||
$valstart $identrest* { emitS TokValIdent }
|
||||
"prim%" $valstart $identrest* { emitS TokValIdent }
|
||||
$opident+ { emitS TokOpIdent }
|
||||
":"+ { emitS TokOpIdent }
|
||||
|
||||
-- Characters and Strings
|
||||
['].['] { emitS TokChar }
|
||||
['] [\\] $escape_char ['] { emitS TokChar }
|
||||
[\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS TokString }
|
||||
|
||||
-- Symbols
|
||||
"(" { emitT LParen }
|
||||
")" { emitT RParen }
|
||||
"[" { emitT LSquare }
|
||||
"]" { emitT RSquare }
|
||||
"{" { emitT LBrace }
|
||||
"}" { emitT RBrace }
|
||||
";" { emitT Semi }
|
||||
"," { emitT Comma }
|
||||
"`" { emitT BTick }
|
||||
[\\] { emitT LLambda }
|
||||
|
||||
{
|
||||
|
||||
type AlexInput = (Position,Char,S.ByteString)
|
||||
|
||||
emitT :: Token -> AlexInput -> Int -> Parser Lexeme
|
||||
emitT tok (pos,_,_) _ = return $! Lexeme pos tok
|
||||
|
||||
emitS :: (String -> Token) -> AlexInput -> Int -> Parser Lexeme
|
||||
emitS mk (pos,c,bs) len = return $! Lexeme pos (mk input)
|
||||
where input = UTF8.toString (S.take len bs)
|
||||
|
||||
scan :: Parser Lexeme
|
||||
scan = do
|
||||
inp@(pos,_,_) <- alexGetInput
|
||||
sc <- alexGetStartCode
|
||||
case alexScan inp sc of
|
||||
AlexEOF -> return $! Lexeme pos TokEOF
|
||||
AlexError inp' -> do
|
||||
let posStr = pprtPosition pos
|
||||
alexError $ posStr ++ ": Lexical error."
|
||||
AlexSkip inp' len' -> alexSetInput inp' >> scan
|
||||
AlexToken inp' len action -> do
|
||||
alexSetInput inp'
|
||||
action inp len
|
||||
|
||||
alexGetInput :: Parser AlexInput
|
||||
alexGetInput = do
|
||||
s <- get
|
||||
return (psPos s, psChar s, psInput s)
|
||||
|
||||
alexSetInput :: AlexInput -> Parser ()
|
||||
alexSetInput (pos,c,bs) = do
|
||||
s <- get
|
||||
set $! s {
|
||||
psPos = pos
|
||||
, psChar = c
|
||||
, psInput = bs
|
||||
}
|
||||
|
||||
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
|
||||
alexGetChar (p,_,bs) = do
|
||||
(c,bs') <- UTF8.uncons bs
|
||||
return (c, (movePos p c, c, bs'))
|
||||
|
||||
alexInputPrevChar :: AlexInput -> Char
|
||||
alexInputPrevChar (_,c,_) = c
|
||||
|
||||
alexError :: String -> Parser a
|
||||
alexError = raiseL
|
||||
|
||||
alexGetStartCode :: Parser Int
|
||||
alexGetStartCode = psLexCode `fmap` get
|
||||
|
||||
alexSetStartCode :: Int -> Parser ()
|
||||
alexSetStartCode code = do
|
||||
s <- get
|
||||
set $! s { psLexCode = code }
|
||||
|
||||
begin code _ _ = alexSetStartCode code >> scan
|
||||
|
||||
buildInt :: Int -> String -> Token
|
||||
buildInt base val = TokInt (base, val)
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user