From e5bb88aa4e08e96115bd9a580678156a809bb39b Mon Sep 17 00:00:00 2001 From: Adam Wick Date: Wed, 8 Jun 2016 16:12:43 -0400 Subject: [PATCH] Something parses! --- bang.cabal | 28 +- src/Bang/CommandLine.hs | 16 +- src/Bang/Syntax/AST.hs | 31 ++- src/Bang/Syntax/Lexer.x | 23 +- src/Bang/Syntax/Location.hs | 4 + src/Bang/Syntax/Parser.hs | 534 ------------------------------------ src/Bang/Syntax/Parser.y | 263 +++++++++++++----- src/Main.hs | 15 +- test.bang | 8 + 9 files changed, 294 insertions(+), 628 deletions(-) delete mode 100644 src/Bang/Syntax/Parser.hs create mode 100644 test.bang diff --git a/bang.cabal b/bang.cabal index 1b292a3..9732f75 100644 --- a/bang.cabal +++ b/bang.cabal @@ -9,20 +9,30 @@ maintainer: Adam Wick category: Development build-type: Simple -cabal-version: >=1.10 +cabal-version: >= 1.10 executable bang main-is: Main.hs build-depends: - array >= 0.5.1.0 && < 0.7, - base >= 4.8 && < 4.9, - bytestring >= 0.10 && < 0.11, - containers >= 0.5.6.2 && < 0.7, - lens >= 4.14 && < 4.18, - monadLib >= 3.7.3 && < 3.9, - optparse-applicative >= 0.12.1 && < 0.14, - text >= 1.2.2.1 && < 1.4 + array, + base, + bytestring, + containers, + lens, + monadLib, + optparse-applicative, + pretty, + text +-- array >= 0.5.1.0 && < 0.7, +-- base >= 4.8 && < 4.9, +-- bytestring >= 0.10 && < 0.11, +-- containers >= 0.5.6.2 && < 0.7, +-- lens >= 4.14 && < 4.18, +-- monadLib >= 3.7.3 && < 3.9, +-- optparse-applicative >= 0.12.1 && < 0.14, +-- pretty >= 1.1.3.2 && < 1.5, +-- text >= 1.2.2.1 && < 1.4 hs-source-dirs: src build-tools: alex, happy ghc-options: -Wall diff --git a/src/Bang/CommandLine.hs b/src/Bang/CommandLine.hs index af59ed6..569eb6f 100644 --- a/src/Bang/CommandLine.hs +++ b/src/Bang/CommandLine.hs @@ -2,6 +2,7 @@ module Bang.CommandLine( BangCommand(..) , BangOperation(..) , LexerOptions(..) + , ParserOptions(..) , getCommand , helpString ) @@ -32,13 +33,15 @@ outputFile = strOption (short 'o' <> long "output-file" <> metavar "FILE" data BangOperation = Help | Version | Lex LexerOptions + | Parse ParserOptions deriving (Show) bangOperation :: Parser BangOperation bangOperation = subparser $ command "help" (pure Help `withInfo` "Describe common commands.") <> command "version" (pure Version `withInfo` "Display version information.") <> - command "lex" (parseLex `withInfo` "Lex a file into its component tokens.") + command "lex" (parseLex `withInfo` "Lex a file into its component tokens.") <> + command "parse" (parseParse `withInfo` "Parse a file into its AST.") withInfo :: Parser a -> String -> ParserInfo a withInfo opts desc = info (helper <*> opts) (progDesc desc) @@ -54,6 +57,17 @@ parseLex = Lex <$> parseLexOptions parseLexOptions :: Parser LexerOptions parseLexOptions = LexerOptions <$> argument str (metavar "FILE") +data ParserOptions = ParserOptions { + parseInputFile :: FilePath + } + deriving (Show) + +parseParse :: Parser BangOperation +parseParse = Parse <$> parseParseOptions + +parseParseOptions :: Parser ParserOptions +parseParseOptions = ParserOptions <$> argument str (metavar "FILE") + parseOptions :: Parser BangCommand parseOptions = BangCommand <$> verboseOption <*> outputFile <*> bangOperation diff --git a/src/Bang/Syntax/AST.hs b/src/Bang/Syntax/AST.hs index dce6500..dfc2c9b 100644 --- a/src/Bang/Syntax/AST.hs +++ b/src/Bang/Syntax/AST.hs @@ -3,18 +3,33 @@ module Bang.Syntax.AST import Data.Text.Lazy(Text) import Bang.Syntax.Location -import Bang.Syntax.Token -data Name = Name Text +data NameEnvironment = ModuleEnv | TypeEnv | VarEnv + deriving (Eq, Ord, Show) -identToName :: Located Token -> Name -identToName = undefined +data Name = Name Location NameEnvironment Word Text + deriving (Show) data Module = Module Name [Declaration] + deriving (Show) -data Declaration = TypeDeclaration - | ValueDeclaration +data Declaration = TypeDeclaration !Name !Type + | ValueDeclaration !Name !Expression + | PrimTypeDecl !PrimitiveType + deriving (Show) -data Expression = Expression +data PrimitiveType = PrimType Name Text + deriving (Show) -data Type = Type +data Expression = ConstantExp Location ConstantVal + | ReferenceExp Location Name + deriving (Show) + +data ConstantVal = ConstantInt Word Text + | ConstantChar Text + | ConstantString Text + | ConstantFloat Text + deriving (Show) + +data Type = TypeRef Location Name + deriving (Show) diff --git a/src/Bang/Syntax/Lexer.x b/src/Bang/Syntax/Lexer.x index e9ba76c..4f0f23d 100644 --- a/src/Bang/Syntax/Lexer.x +++ b/src/Bang/Syntax/Lexer.x @@ -10,6 +10,7 @@ import Bang.Syntax.Location import Bang.Syntax.Name import Bang.Syntax.Token import Data.Char(isSpace, isAscii, ord) +import Data.Int(Int64) import Data.Maybe(fromMaybe) import Data.Text.Lazy(Text) import qualified Data.Text.Lazy as T @@ -34,13 +35,13 @@ $escape_char = [abfnrtv'\"\\] -- Whitespace $white+ ; - "/*".*"*/" ; + "/*"[.\n]*"*/" ; -- Numbers - $decdigit+ { emitS (IntTok 10) } - "0x"$hexdigit+ { emitS (IntTok 16) } - "0o"$octdigit+ { emitS (IntTok 8) } - "0b"$bindigit+ { emitS (IntTok 2) } + $decdigit+ { emitI 0 (IntTok 10) } + "0x"$hexdigit+ { emitI 2 (IntTok 16) } + "0o"$octdigit+ { emitI 2 (IntTok 8) } + "0b"$bindigit+ { emitI 2 (IntTok 2) } $decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS FloatTok} $decdigit+"e""-"?$decdigit+ { emitS FloatTok} @@ -74,7 +75,9 @@ lexer src mbPos txt = go (AlexInput startPos txt) startPos = fromMaybe initialPosition mbPos go input = case alexScan input 0 of - AlexEOF -> [] + AlexEOF -> let AlexInput pos _ = input + loc = Location src pos pos + in [EOFTok `locatedAt` loc] AlexError input' -> let AlexInput pos text = input' (as, bs) = T.break isSpace text pos' = advanceWith' pos as @@ -106,4 +109,12 @@ emitS mk src len (AlexInput pos t) = token `locatedAt` loc token = mk txt loc = Location src pos (pos `advanceWith'` txt) +emitI :: Int64 -> (Text -> Token) -> AlexAction +emitI dropCount mk src len (AlexInput pos t) = token `locatedAt` loc + where + baseText = T.take (fromIntegral len) t + txt = T.drop dropCount baseText + token = mk txt + loc = Location src pos (pos `advanceWith'` baseText) + } diff --git a/src/Bang/Syntax/Location.hs b/src/Bang/Syntax/Location.hs index 25b1c74..4a13ed9 100644 --- a/src/Bang/Syntax/Location.hs +++ b/src/Bang/Syntax/Location.hs @@ -9,6 +9,7 @@ module Bang.Syntax.Location( , locSource, locStart, locEnd , Located(..) , locatedAt + , unknownLocation ) where @@ -66,3 +67,6 @@ instance Show a => Show (Located a) where locatedAt :: a -> Location -> Located a locatedAt a p = Located p a + +unknownLocation :: Location +unknownLocation = Location Unknown initialPosition initialPosition diff --git a/src/Bang/Syntax/Parser.hs b/src/Bang/Syntax/Parser.hs deleted file mode 100644 index 2821e4a..0000000 --- a/src/Bang/Syntax/Parser.hs +++ /dev/null @@ -1,534 +0,0 @@ -{-# OPTIONS_GHC -w #-} -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE OverloadedStrings #-} -{-# OPTION_GHC -w #-} -module Bang.Syntax.Parser( - parseModule - , ParseError, showError - , lexWithLayout - ) - where - -import Bang.Syntax.AST -import Bang.Syntax.Lexer -import Bang.Syntax.Location -import Bang.Syntax.Token -import Data.Map.Strict(Map) -import Data.Map.Strict as Map -import Data.Text.Lazy(Text) -import qualified Data.Text.Lazy as T -import MonadLib -import Control.Applicative(Applicative(..)) - --- parser produced by Happy Version 1.19.4 - -data HappyAbsSyn t8 t9 t10 - = HappyTerminal (Located Token) - | HappyErrorToken Int - | HappyAbsSyn4 (Module) - | HappyAbsSyn5 (Maybe Declaration) - | HappyAbsSyn6 (Type) - | HappyAbsSyn7 (Expression) - | HappyAbsSyn8 t8 - | HappyAbsSyn9 t9 - | HappyAbsSyn10 t10 - -action_0 (17) = happyShift action_2 -action_0 (4) = happyGoto action_3 -action_0 _ = happyFail - -action_1 (17) = happyShift action_2 -action_1 _ = happyFail - -action_2 (20) = happyShift action_4 -action_2 _ = happyFail - -action_3 (52) = happyAccept -action_3 _ = happyFail - -action_4 (8) = happyGoto action_5 -action_4 _ = happyReduce_7 - -action_5 (14) = happyShift action_7 -action_5 (21) = happyShift action_8 -action_5 (5) = happyGoto action_6 -action_5 _ = happyReduce_1 - -action_6 _ = happyReduce_8 - -action_7 (18) = happyShift action_11 -action_7 _ = happyFail - -action_8 (11) = happyShift action_9 -action_8 (12) = happyShift action_10 -action_8 _ = happyFail - -action_9 (6) = happyGoto action_16 -action_9 _ = happyReduce_5 - -action_10 (7) = happyGoto action_15 -action_10 _ = happyReduce_6 - -action_11 (19) = happyShift action_14 -action_11 (9) = happyGoto action_12 -action_11 (10) = happyGoto action_13 -action_11 _ = happyReduce_9 - -action_12 _ = happyReduce_4 - -action_13 (13) = happyShift action_17 -action_13 _ = happyReduce_10 - -action_14 _ = happyReduce_11 - -action_15 _ = happyReduce_3 - -action_16 _ = happyReduce_2 - -action_17 (19) = happyShift action_18 -action_17 _ = happyFail - -action_18 _ = happyReduce_12 - -happyReduce_1 = happySpecReduce_3 4 happyReduction_1 -happyReduction_1 _ - (HappyTerminal happy_var_2) - (HappyTerminal (Located happy_var_1 (ValIdent _ "module"))) - = HappyAbsSyn4 - (Module (identToName happy_var_1 happy_var_2) - ) -happyReduction_1 _ _ _ = notHappyAtAll - -happyReduce_2 = happySpecReduce_3 5 happyReduction_2 -happyReduction_2 _ - _ - _ - = HappyAbsSyn5 - (Just TypeDeclartion - ) - -happyReduce_3 = happySpecReduce_3 5 happyReduction_3 -happyReduction_3 _ - _ - _ - = HappyAbsSyn5 - (Just ValueDeclaration - ) - -happyReduce_4 = happySpecReduce_3 5 happyReduction_4 -happyReduction_4 _ - _ - _ - = HappyAbsSyn5 - (return Nothing - ) - -happyReduce_5 = happySpecReduce_0 6 happyReduction_5 -happyReduction_5 = HappyAbsSyn6 - (Type - ) - -happyReduce_6 = happySpecReduce_0 7 happyReduction_6 -happyReduction_6 = HappyAbsSyn7 - (Expression - ) - -happyReduce_7 = happySpecReduce_0 8 happyReduction_7 -happyReduction_7 = HappyAbsSyn8 - ([] - ) - -happyReduce_8 = happySpecReduce_2 8 happyReduction_8 -happyReduction_8 (HappyAbsSyn5 happy_var_2) - (HappyAbsSyn8 happy_var_1) - = HappyAbsSyn8 - (case happy_var_2 of - Nothing -> happy_var_1 - Just x -> happy_var_1 ++ [x] - ) -happyReduction_8 _ _ = notHappyAtAll - -happyReduce_9 = happySpecReduce_0 9 happyReduction_9 -happyReduction_9 = HappyAbsSyn9 - ([] - ) - -happyReduce_10 = happySpecReduce_1 9 happyReduction_10 -happyReduction_10 (HappyAbsSyn10 happy_var_1) - = HappyAbsSyn9 - (reverse happy_var_1 - ) -happyReduction_10 _ = notHappyAtAll - -happyReduce_11 = happySpecReduce_1 10 happyReduction_11 -happyReduction_11 (HappyTerminal happy_var_1) - = HappyAbsSyn10 - ([happy_var_1] - ) -happyReduction_11 _ = notHappyAtAll - -happyReduce_12 = happySpecReduce_3 10 happyReduction_12 -happyReduction_12 (HappyTerminal happy_var_3) - _ - (HappyAbsSyn10 happy_var_1) - = HappyAbsSyn10 - (happy_var_3 : happy_var_1 - ) -happyReduction_12 _ _ _ = notHappyAtAll - -happyNewToken action sts stk - = runNextToken(\tk -> - let cont i = action i i tk (HappyState action) sts stk in - case tk of { - Located initialPosition EOFTok -> action 52 52 tk (HappyState action) sts stk; - Located happy_dollar_dollar (OpIdent _ "::") -> cont 11; - Located happy_dollar_dollar (OpIdent _ "=") -> cont 12; - Located happy_dollar_dollar (OpIdent _ ",") -> cont 13; - Located happy_dollar_dollar (ValIdent _ "infixl") -> cont 14; - Located happy_dollar_dollar (ValIdent _ "infixr") -> cont 15; - Located happy_dollar_dollar (ValIdent _ "infix") -> cont 16; - Located happy_dollar_dollar (ValIdent _ "module") -> cont 17; - Located _ (IntTok _ _) -> cont 18; - Located _ (OpIdent _) -> cont 19; - Located _ (TypeIdent _) -> cont 20; - Located _ (ValIdent _) -> cont 21; - Located _ (OpIdent (LeftAssoc 0) _) -> cont 22; - Located _ (OpIdent (RightAssoc 0) _) -> cont 23; - Located _ (OpIdent (NonAssoc 0) _) -> cont 24; - Located _ (OpIdent (LeftAssoc 1) _) -> cont 25; - Located _ (OpIdent (RightAssoc 1) _) -> cont 26; - Located _ (OpIdent (NonAssoc 1) _) -> cont 27; - Located _ (OpIdent (LeftAssoc 2) _) -> cont 28; - Located _ (OpIdent (RightAssoc 2) _) -> cont 29; - Located _ (OpIdent (NonAssoc 2) _) -> cont 30; - Located _ (OpIdent (LeftAssoc 3) _) -> cont 31; - Located _ (OpIdent (RightAssoc 3) _) -> cont 32; - Located _ (OpIdent (NonAssoc 3) _) -> cont 33; - Located _ (OpIdent (LeftAssoc 4) _) -> cont 34; - Located _ (OpIdent (RightAssoc 4) _) -> cont 35; - Located _ (OpIdent (NonAssoc 4) _) -> cont 36; - Located _ (OpIdent (LeftAssoc 5) _) -> cont 37; - Located _ (OpIdent (RightAssoc 5) _) -> cont 38; - Located _ (OpIdent (NonAssoc 5) _) -> cont 39; - Located _ (OpIdent (LeftAssoc 6) _) -> cont 40; - Located _ (OpIdent (RightAssoc 6) _) -> cont 41; - Located _ (OpIdent (NonAssoc 6) _) -> cont 42; - Located _ (OpIdent (LeftAssoc 7) _) -> cont 43; - Located _ (OpIdent (RightAssoc 7) _) -> cont 44; - Located _ (OpIdent (NonAssoc 7) _) -> cont 45; - Located _ (OpIdent (LeftAssoc 8) _) -> cont 46; - Located _ (OpIdent (RightAssoc 8) _) -> cont 47; - Located _ (OpIdent (NonAssoc 8) _) -> cont 48; - Located _ (OpIdent (LeftAssoc 9) _) -> cont 49; - Located _ (OpIdent (RightAssoc 9) _) -> cont 50; - Located _ (OpIdent (NonAssoc 9) _) -> cont 51; - _ -> happyError' tk - }) - -happyError_ 52 tk = happyError' tk -happyError_ _ tk = happyError' tk - -happyThen :: () => Parser a -> (a -> Parser b) -> Parser b -happyThen = (>>=) -happyReturn :: () => a -> Parser a -happyReturn = (return) -happyThen1 = happyThen -happyReturn1 :: () => a -> Parser a -happyReturn1 = happyReturn -happyError' :: () => (Located Token) -> Parser a -happyError' tk = parseError tk - -top_module = happySomeParser where - happySomeParser = happyThen (happyParse action_0) (\x -> case x of {HappyAbsSyn4 z -> happyReturn z; _other -> notHappyAtAll }) - -happySeq = happyDontSeq - - -newtype Parser a = Parser { - unParser :: StateT ParserState (ExceptionT ParseError Id) a - } - deriving (Functor, Applicative, Monad) - -data ParseError = LexError Location Text - | ParseError Location Token - | UnexpectedEOF - deriving (Show) - -showError :: ParseError -> String -showError (LexError l t) = show l ++ ": lexer error around " ++ T.unpack t -showError (ParseError l t) = show l ++ ": parse error around " ++ showToken t -showError UnexpectedEOF = "Unexpected end of file" - -data ParserState = ParserState { - psPrecTable :: Map Text Word - , psTokenStream :: [Located Token] - } - -instance StateM Parser ParserState where - get = Parser get - set = Parser . set - -instance ExceptionM Parser ParseError where - raise = Parser . raise - -instance RunExceptionM Parser ParseError where - try m = Parser (try (unParser m)) - -runNextToken :: (Located Token -> Parser a) -> Parser a -runNextToken action = - do state <- get - case psTokenStream state of - [] -> raise UnexpectedEOF - (x : rest) -> - do set (state{ psTokenStream = rest }) - action x - -lexWithLayout :: Origin -> Position -> Text -> [Located Token] -lexWithLayout src pos txt = lexer src (Just pos) txt - -parseModule :: Origin -> Text -> Either ParseError Module -parseModule src txt = - let parserM = unParser top_module - excM = runStateT initialState (parserM :: StateT ParserState (ExceptionT ParseError Id) Module) - idM = runExceptionT (excM :: ExceptionT ParseError Id (Module, ParserState)) - resWState = runId idM - in fmap fst resWState - where - tokenStream = lexWithLayout src initialPosition txt - initialState = ParserState Map.empty tokenStream - -parseError :: Located Token -> Parser a -parseError t = - case t of - Located _ EOFTok -> raise UnexpectedEOF - Located p (ErrorTok t) -> raise (LexError p t) - Located p t -> raise (ParseError p t) -{-# LINE 1 "templates/GenericTemplate.hs" #-} --- Id: GenericTemplate.hs,v 1.26 2005/01/14 14:47:22 simonmar Exp - - - - - - - - - - - - - - - - - - - - - - - - - -infixr 9 `HappyStk` -data HappyStk a = HappyStk a (HappyStk a) - ------------------------------------------------------------------------------ --- starting the parse - -happyParse start_state = happyNewToken start_state notHappyAtAll notHappyAtAll - ------------------------------------------------------------------------------ --- Accepting the parse - --- If the current token is (1), it means we've just accepted a partial --- parse (a %partial parser). We must ignore the saved token on the top of --- the stack in this case. -happyAccept (1) tk st sts (_ `HappyStk` ans `HappyStk` _) = - happyReturn1 ans -happyAccept j tk st sts (HappyStk ans _) = - (happyReturn1 ans) - ------------------------------------------------------------------------------ --- Arrays only: do the next action - - - ------------------------------------------------------------------------------ --- HappyState data type (not arrays) - - - -newtype HappyState b c = HappyState - (Int -> -- token number - Int -> -- token number (yes, again) - b -> -- token semantic value - HappyState b c -> -- current state - [HappyState b c] -> -- state stack - c) - - - ------------------------------------------------------------------------------ --- Shifting a token - -happyShift new_state (1) tk st sts stk@(x `HappyStk` _) = - let i = (case x of { HappyErrorToken (i) -> i }) in --- trace "shifting the error token" $ - new_state i i tk (HappyState (new_state)) ((st):(sts)) (stk) - -happyShift new_state i tk st sts stk = - happyNewToken new_state ((st):(sts)) ((HappyTerminal (tk))`HappyStk`stk) - --- happyReduce is specialised for the common cases. - -happySpecReduce_0 i fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happySpecReduce_0 nt fn j tk st@((HappyState (action))) sts stk - = action nt j tk st ((st):(sts)) (fn `HappyStk` stk) - -happySpecReduce_1 i fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happySpecReduce_1 nt fn j tk _ sts@(((st@(HappyState (action))):(_))) (v1`HappyStk`stk') - = let r = fn v1 in - happySeq r (action nt j tk st sts (r `HappyStk` stk')) - -happySpecReduce_2 i fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happySpecReduce_2 nt fn j tk _ ((_):(sts@(((st@(HappyState (action))):(_))))) (v1`HappyStk`v2`HappyStk`stk') - = let r = fn v1 v2 in - happySeq r (action nt j tk st sts (r `HappyStk` stk')) - -happySpecReduce_3 i fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happySpecReduce_3 nt fn j tk _ ((_):(((_):(sts@(((st@(HappyState (action))):(_))))))) (v1`HappyStk`v2`HappyStk`v3`HappyStk`stk') - = let r = fn v1 v2 v3 in - happySeq r (action nt j tk st sts (r `HappyStk` stk')) - -happyReduce k i fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happyReduce k nt fn j tk st sts stk - = case happyDrop (k - ((1) :: Int)) sts of - sts1@(((st1@(HappyState (action))):(_))) -> - let r = fn stk in -- it doesn't hurt to always seq here... - happyDoSeq r (action nt j tk st1 sts1 r) - -happyMonadReduce k nt fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happyMonadReduce k nt fn j tk st sts stk = - case happyDrop k ((st):(sts)) of - sts1@(((st1@(HappyState (action))):(_))) -> - let drop_stk = happyDropStk k stk in - happyThen1 (fn stk tk) (\r -> action nt j tk st1 sts1 (r `HappyStk` drop_stk)) - -happyMonad2Reduce k nt fn (1) tk st sts stk - = happyFail (1) tk st sts stk -happyMonad2Reduce k nt fn j tk st sts stk = - case happyDrop k ((st):(sts)) of - sts1@(((st1@(HappyState (action))):(_))) -> - let drop_stk = happyDropStk k stk - - - - - - new_state = action - - in - happyThen1 (fn stk tk) (\r -> happyNewToken new_state sts1 (r `HappyStk` drop_stk)) - -happyDrop (0) l = l -happyDrop n ((_):(t)) = happyDrop (n - ((1) :: Int)) t - -happyDropStk (0) l = l -happyDropStk n (x `HappyStk` xs) = happyDropStk (n - ((1)::Int)) xs - ------------------------------------------------------------------------------ --- Moving to a new state after a reduction - - - - - - - - - -happyGoto action j tk st = action j j tk (HappyState action) - - ------------------------------------------------------------------------------ --- Error recovery ((1) is the error token) - --- parse error if we are in recovery and we fail again -happyFail (1) tk old_st _ stk@(x `HappyStk` _) = - let i = (case x of { HappyErrorToken (i) -> i }) in --- trace "failing" $ - happyError_ i tk - -{- We don't need state discarding for our restricted implementation of - "error". In fact, it can cause some bogus parses, so I've disabled it - for now --SDM - --- discard a state -happyFail (1) tk old_st (((HappyState (action))):(sts)) - (saved_tok `HappyStk` _ `HappyStk` stk) = --- trace ("discarding state, depth " ++ show (length stk)) $ - action (1) (1) tk (HappyState (action)) sts ((saved_tok`HappyStk`stk)) --} - --- Enter error recovery: generate an error token, --- save the old token and carry on. -happyFail i tk (HappyState (action)) sts stk = --- trace "entering error recovery" $ - action (1) (1) tk (HappyState (action)) sts ( (HappyErrorToken (i)) `HappyStk` stk) - --- Internal happy errors: - -notHappyAtAll :: a -notHappyAtAll = error "Internal Happy error\n" - ------------------------------------------------------------------------------ --- Hack to get the typechecker to accept our action functions - - - - - - - ------------------------------------------------------------------------------ --- Seq-ing. If the --strict flag is given, then Happy emits --- happySeq = happyDoSeq --- otherwise it emits --- happySeq = happyDontSeq - -happyDoSeq, happyDontSeq :: a -> b -> b -happyDoSeq a b = a `seq` b -happyDontSeq a b = b - ------------------------------------------------------------------------------ --- Don't inline any functions from the template. GHC has a nasty habit --- of deciding to inline happyGoto everywhere, which increases the size of --- the generated parser quite a bit. - - - - - - - - - -{-# NOINLINE happyShift #-} -{-# NOINLINE happySpecReduce_0 #-} -{-# NOINLINE happySpecReduce_1 #-} -{-# NOINLINE happySpecReduce_2 #-} -{-# NOINLINE happySpecReduce_3 #-} -{-# NOINLINE happyReduce #-} -{-# NOINLINE happyMonadReduce #-} -{-# NOINLINE happyGoto #-} -{-# NOINLINE happyFail #-} - --- end of Happy Template. - diff --git a/src/Bang/Syntax/Parser.y b/src/Bang/Syntax/Parser.y index 91abae7..6759b89 100644 --- a/src/Bang/Syntax/Parser.y +++ b/src/Bang/Syntax/Parser.y @@ -15,65 +15,71 @@ import Bang.Syntax.AST import Bang.Syntax.Lexer import Bang.Syntax.Location import Bang.Syntax.Token +import Data.Char(digitToInt) import Data.Map.Strict(Map) import Data.Map.Strict as Map +import Data.Maybe(catMaybes) import Data.Text.Lazy(Text) import qualified Data.Text.Lazy as T import MonadLib +import Debug.Trace + } +%name top_module %tokentype { Located Token } +%monad { Parser } +%error { parseError } +%lexer { runNextToken } { Located _ EOFTok } %token - '::' { Located $$ (OpIdent _ "::") } - '=' { Located $$ (OpIdent _ "=") } - ',' { Located $$ (OpIdent _ ",") } - 'infixl' { Located $$ (ValIdent "infixl") } - 'infixr' { Located $$ (ValIdent "infixr") } - 'infix' { Located $$ (ValIdent "infix") } - 'module' { Located $$ (ValIdent "module") } - Integer { Located _ (IntTok _ _) } - OpIdent { Located _ (OpIdent _ _) } - TypeIdent { Located _ (TypeIdent _) } - ValIdent { Located _ (ValIdent _) } - OPL0 { Located _ (OpIdent (LeftAssoc 0) _) } - OPR0 { Located _ (OpIdent (RightAssoc 0) _) } - OPN0 { Located _ (OpIdent (NonAssoc 0) _) } - OPL1 { Located _ (OpIdent (LeftAssoc 1) _) } - OPR1 { Located _ (OpIdent (RightAssoc 1) _) } - OPN1 { Located _ (OpIdent (NonAssoc 1) _) } - OPL2 { Located _ (OpIdent (LeftAssoc 2) _) } - OPR2 { Located _ (OpIdent (RightAssoc 2) _) } - OPN2 { Located _ (OpIdent (NonAssoc 2) _) } - OPL3 { Located _ (OpIdent (LeftAssoc 3) _) } - OPR3 { Located _ (OpIdent (RightAssoc 3) _) } - OPN3 { Located _ (OpIdent (NonAssoc 3) _) } - OPL4 { Located _ (OpIdent (LeftAssoc 4) _) } - OPR4 { Located _ (OpIdent (RightAssoc 4) _) } - OPN4 { Located _ (OpIdent (NonAssoc 4) _) } - OPL5 { Located _ (OpIdent (LeftAssoc 5) _) } - OPR5 { Located _ (OpIdent (RightAssoc 5) _) } - OPN5 { Located _ (OpIdent (NonAssoc 5) _) } - OPL6 { Located _ (OpIdent (LeftAssoc 6) _) } - OPR6 { Located _ (OpIdent (RightAssoc 6) _) } - OPN6 { Located _ (OpIdent (NonAssoc 6) _) } - OPL7 { Located _ (OpIdent (LeftAssoc 7) _) } - OPR7 { Located _ (OpIdent (RightAssoc 7) _) } - OPN7 { Located _ (OpIdent (NonAssoc 7) _) } - OPL8 { Located _ (OpIdent (LeftAssoc 8) _) } - OPR8 { Located _ (OpIdent (RightAssoc 8) _) } - OPN8 { Located _ (OpIdent (NonAssoc 8) _) } - OPL9 { Located _ (OpIdent (LeftAssoc 9) _) } - OPR9 { Located _ (OpIdent (RightAssoc 9) _) } - OPN9 { Located _ (OpIdent (NonAssoc 9) _) } - - -%monad { Parser } -%error { parseError } -%lexer { runNextToken } { Located initialPosition EOFTok } - -%name top_module + '::' { Located $$ (OpIdent _ "::") } + '=' { Located $$ (OpIdent _ "=") } + ',' { Located $$ (OpIdent _ ",") } + 'infixl' { Located $$ (ValIdent "infixl") } + 'infixr' { Located $$ (ValIdent "infixr") } + 'infix' { Located $$ (ValIdent "infix") } + 'module' { Located $$ (ValIdent "module") } + 'primitive' { Located $$ (ValIdent "primitive") } + 'type' { Located $$ (ValIdent "type") } + Integer { Located _ (IntTok _ _) } + Float { Located _ (FloatTok _) } + Char { Located _ (CharTok _) } + String { Located _ (StringTok _) } + OpIdent { Located _ (OpIdent _ _) } + TypeIdent { Located _ (TypeIdent _) } + ValIdent { Located _ (ValIdent _) } + OPL0 { Located _ (OpIdent (LeftAssoc 0) _) } + OPR0 { Located _ (OpIdent (RightAssoc 0) _) } + OPN0 { Located _ (OpIdent (NonAssoc 0) _) } + OPL1 { Located _ (OpIdent (LeftAssoc 1) _) } + OPR1 { Located _ (OpIdent (RightAssoc 1) _) } + OPN1 { Located _ (OpIdent (NonAssoc 1) _) } + OPL2 { Located _ (OpIdent (LeftAssoc 2) _) } + OPR2 { Located _ (OpIdent (RightAssoc 2) _) } + OPN2 { Located _ (OpIdent (NonAssoc 2) _) } + OPL3 { Located _ (OpIdent (LeftAssoc 3) _) } + OPR3 { Located _ (OpIdent (RightAssoc 3) _) } + OPN3 { Located _ (OpIdent (NonAssoc 3) _) } + OPL4 { Located _ (OpIdent (LeftAssoc 4) _) } + OPR4 { Located _ (OpIdent (RightAssoc 4) _) } + OPN4 { Located _ (OpIdent (NonAssoc 4) _) } + OPL5 { Located _ (OpIdent (LeftAssoc 5) _) } + OPR5 { Located _ (OpIdent (RightAssoc 5) _) } + OPN5 { Located _ (OpIdent (NonAssoc 5) _) } + OPL6 { Located _ (OpIdent (LeftAssoc 6) _) } + OPR6 { Located _ (OpIdent (RightAssoc 6) _) } + OPN6 { Located _ (OpIdent (NonAssoc 6) _) } + OPL7 { Located _ (OpIdent (LeftAssoc 7) _) } + OPR7 { Located _ (OpIdent (RightAssoc 7) _) } + OPN7 { Located _ (OpIdent (NonAssoc 7) _) } + OPL8 { Located _ (OpIdent (LeftAssoc 8) _) } + OPR8 { Located _ (OpIdent (RightAssoc 8) _) } + OPN8 { Located _ (OpIdent (NonAssoc 8) _) } + OPL9 { Located _ (OpIdent (LeftAssoc 9) _) } + OPR9 { Located _ (OpIdent (RightAssoc 9) _) } + OPN9 { Located _ (OpIdent (NonAssoc 9) _) } %right OPL0 %left OPR0 @@ -110,25 +116,65 @@ import MonadLib top_module :: { Module } : 'module' TypeIdent listopt(declaration) - { Module (identToName $2) $3 } + {% + do let Located src (TypeIdent rawName) = $2 + name <- registerName False src ModuleEnv rawName + return (Module name $3) } declaration :: { Maybe Declaration } : ValIdent '::' Type - { Just TypeDeclaration } + {% + do let Located src (ValIdent rawName) = $1 + name <- registerName True src VarEnv rawName + return (Just (TypeDeclaration name $3)) } | ValIdent '=' Expression - { Just ValueDeclaration } + {% + do let Located src (ValIdent rawName) = $1 + name <- registerName True src VarEnv rawName + return (Just (ValueDeclaration name $3)) } | 'infixl' Integer sep(',',OpIdent) {% addFixities $1 LeftAssoc $2 $3 >> return Nothing } | 'infixr' Integer sep(',',OpIdent) {% addFixities $1 RightAssoc $2 $3 >> return Nothing } | 'infix' Integer sep(',',OpIdent) {% addFixities $1 NonAssoc $2 $3 >> return Nothing } + | 'primitive' 'type' TypeIdent '=' String + {% + do let Located src (TypeIdent rawName) = $3 + Located _ (StringTok rawText) = $5 + name <- registerName False src TypeEnv rawName + return (Just (PrimTypeDecl (PrimType name rawText))) } Type :: { Type } - : {- empty -} { Type } + : TypeIdent {% + do let Located src (TypeIdent rawName) = $1 + name <- lookupName src TypeEnv rawName + return (TypeRef src name) } Expression :: { Expression } - : {- empty -} { Expression } + : BaseExpression { $1 } + +BaseExpression :: { Expression } + : OpIdent {% + do let Located src (OpIdent _ rawName) = $1 + name <- lookupName src VarEnv rawName + return (ReferenceExp src name) } + | ValIdent {% + do let Located src (ValIdent rawName) = $1 + name <- lookupName src VarEnv rawName + return (ReferenceExp src (trace "NAME" name)) } + | Integer {% + do let Located src (IntTok base val) = $1 + return (ConstantExp src (ConstantInt base val)) } + | String {% + do let Located src (StringTok val) = $1 + return (ConstantExp src (ConstantString val)) } + | Float {% + do let Located src (FloatTok val) = $1 + return (ConstantExp src (ConstantFloat val)) } + | Char {% + do let Located src (CharTok val) = $1 + return (ConstantExp src (ConstantChar val)) } -- ----------------------------------------------------------------------------- @@ -159,11 +205,12 @@ list_body(p) | list_body(p) p { $2 : $1 } listopt(p) - : {- empty -} { [] } - | listopt(p) p { case $2 of - Nothing -> $1 - Just x -> $1 ++ [x] - } + : p p p { catMaybes [$1, $2, $3] } +-- : {- empty -} { [] } +-- | listopt(p) p { case $2 of +-- Nothing -> $1 +-- Just x -> $1 ++ [x] +-- } { @@ -172,9 +219,11 @@ newtype Parser a = Parser { } deriving (Functor, Applicative, Monad) -data ParseError = LexError Location Text - | ParseError Location Token - | SemanticError Location Text +data ParseError = LexError Location Text + | ParseError Location Token + | RedefinitionError Location Location Text + | InternalError Location Text + | UnboundVariable Location Text | UnexpectedEOF deriving (Show) @@ -184,10 +233,21 @@ showError (ParseError l t) = show l ++ ": parse error around " ++ showToken t showError UnexpectedEOF = "Unexpected end of file" data ParserState = ParserState { - psPrecTable :: Map Text Word - , psTokenStream :: [Located Token] + psPrecTable :: Map Text Fixity + , psTokenStream :: [Located Token] + , psNameDatabase :: Map (NameEnvironment, Text) Name + , psNextIdent :: Word } +initialState :: [Located Token] -> ParserState +initialState tokenStream = ParserState { + psPrecTable = Map.empty + , psTokenStream = tokenStream + , psNameDatabase = Map.empty + , psNextIdent = 1 + } + + instance StateM Parser ParserState where get = Parser get set = Parser . set @@ -201,13 +261,82 @@ instance RunExceptionM Parser ParseError where addFixities :: Location -> (Word -> Fixity) -> Located Token -> [Located Token] -> Parser () -addFixities = undefined +addFixities src fixityBuilder lval names = + do value <- processInteger lval + let fixity = fixityBuilder value + forM_ names $ \ tok -> + do state <- get + name <- forceNameDefined VarEnv src tok state + let table' = Map.insert name fixity (psPrecTable state) + set state{ psPrecTable = table' } + where + processInteger x = + case x of + Located _ (IntTok base text) -> + return (makeNumeric base text 0) + _ -> + raise (InternalError src "Non-number in fixity?") + + -- + makeNumeric base text acc = + case T.uncons text of + Nothing -> acc + Just (x, rest) -> + let acc' = (acc * base) + charValue x + in makeNumeric base rest acc' + -- + charValue = fromIntegral . digitToInt + -- + tokenName t = + case t of + Located _ (TypeIdent x) -> x + Located _ (ValIdent x) -> x + Located _ (OpIdent _ x) -> x + _ -> + error "Internal error (tokenName in Parser.y)" + -- + forceNameDefined env src token state = + do let name = tokenName token + case Map.lookup (env, name) (psNameDatabase state) of + Just _ -> return name + Nothing -> raise (UnboundVariable src name) + +registerName :: Bool -> Location -> NameEnvironment -> Text -> Parser Name +registerName redefOk loc env name = + do state <- get + let key = (env, name) + db = psNameDatabase state + case Map.lookup key db of + Nothing -> + do let res = Name loc env (psNextIdent state) name + state' = state { + psNameDatabase = Map.insert key res db + , psNextIdent = 1 + psNextIdent state + } + set state' + return res + Just res | redefOk -> + return res + Just (Name origLoc _ _ _) -> + raise (RedefinitionError loc origLoc name) + +lookupName :: Location -> NameEnvironment -> Text -> Parser Name +lookupName loc env name = + do state <- get + case Map.lookup (env, name) (psNameDatabase state) of + Nothing -> + raise (UnboundVariable loc name) + Just name -> + return name runNextToken :: (Located Token -> Parser a) -> Parser a runNextToken action = do state <- get case psTokenStream state of - [] -> raise UnexpectedEOF + [] -> + raise (InternalError unknownLocation "End of stream, but no EOF?") + (eof@(Located _ EOFTok) : _) -> + action eof -- leave this on at the end of the stream (x : rest) -> do set (state{ psTokenStream = rest }) action x @@ -218,13 +347,13 @@ lexWithLayout src pos txt = lexer src (Just pos) txt parseModule :: Origin -> Text -> Either ParseError Module parseModule src txt = let parserM = unParser top_module - excM = runStateT initialState (parserM :: StateT ParserState (ExceptionT ParseError Id) Module) + excM = runStateT (initialState tokenStream) + (parserM :: StateT ParserState (ExceptionT ParseError Id) Module) idM = runExceptionT (excM :: ExceptionT ParseError Id (Module, ParserState)) resWState = runId idM in fmap fst resWState where tokenStream = lexWithLayout src initialPosition txt - initialState = ParserState Map.empty tokenStream parseError :: Located Token -> Parser a parseError t = diff --git a/src/Main.hs b/src/Main.hs index 969880d..70d0bb8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -12,7 +12,8 @@ import System.IO.Error(isDoesNotExistError) main :: IO () main = getCommand >>= \ cmd -> case cmdCommand cmd of - Lex o -> runLexer cmd o + Lex o -> runLexer cmd o + Parse o -> runParser cmd o Help -> putStrLn helpString Version -> putStrLn ("Bang tool, version " ++ showVersion version) @@ -21,9 +22,17 @@ runLexer _cmd opts = do let path = lexInputFile opts mtxt <- tryJust (guard . isDoesNotExistError) (T.readFile path) case mtxt of - Left _ -> fail ("Unable to opten file: " ++ path) + Left _ -> fail ("Unable to open file: " ++ path) Right txt -> do let tokens = lexer (File path) (Just initialPosition) txt mapM_ (putStrLn . show) tokens - +runParser :: BangCommand -> ParserOptions -> IO () +runParser _cmd opts = + do let path = parseInputFile opts + mtxt <- tryJust (guard . isDoesNotExistError) (T.readFile path) + case mtxt of + Left _ -> fail ("Unable to open file: " ++ path) + Right txt -> + do let mod = parseModule (File path) txt + putStrLn (show mod) diff --git a/test.bang b/test.bang new file mode 100644 index 0000000..09aaf83 --- /dev/null +++ b/test.bang @@ -0,0 +1,8 @@ +module Test + +primitive type Word = "u64" + +/* This is a number! */ +one :: Word +one = 1 +