commit deec00d5e05125a4922a8c2012aadaf996162d39 Author: Adam Wick Date: Wed Dec 29 12:23:16 2010 -0800 Initial import. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b44da76 --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +GHC ?= ghc +GHC_FLAGS ?= -Wall -i$(TOPDIR)/hsrc -XMultiParamTypeClasses +ALEX ?= alex +ALEX_FLAGS ?= +HAPPY ?= happy +HAPPY_FLAGS ?= +SED ?= sed +FIND ?= find +RM ?= rm +PACKAGES = monadLib bytestring utf8-string + +TARGET ?= bang +TOPDIR := $(shell pwd) +CURDIR := $(TOPDIR) +GHC_PACKAGES := $(addprefix -package ,$(PACKAGES)) +OBJECTS := + +.PHONY: all +all: $(TARGET) + +include $(CURDIR)/mk/build.mk +include $(CURDIR)/hsrc/Makefile + +LIBS := monadLib +DEPENDS := $(HS_SOURCES:.hs=.d) + +$(TARGET): $(OBJECTS) + $(call cmd,ghc_ld) $(GHC_PACKAGES) + +.PHONY: clean +clean: + $(FIND) . -name '*.d' -delete + $(FIND) . -name '*.hi' -delete + $(FIND) . -name '*.o' -delete + $(RM) -f $(TARGET) $(OBJECTS) $(EXTRA_CLEAN) + +foo: + @echo $(DEPENDS) + +-include $(DEPENDS) diff --git a/bsrc/Data/List.hs b/bsrc/Data/List.hs new file mode 100644 index 0000000..3fe3f48 --- /dev/null +++ b/bsrc/Data/List.hs @@ -0,0 +1,24 @@ +module Data.List + +data List a = Null | (:) a (List a) + +(++) :: [a] -> [a] -> [a] +a ++ [] = a +[] ++ b = b +(af:ar) ++ b = af:(ar ++ b) + +null :: [a] -> Bool +null [] = True +null _ = False + +length :: [a] -> Word +length [] = 0 +length (a:rest) = 1 + length rest + +reverse :: [a] -> [a] +reverse xs = helper xs [] + where + helper [] acc = acc + helper (a:rest) acc = helper rest (a:acc) + + diff --git a/hsrc/Main.hs b/hsrc/Main.hs new file mode 100644 index 0000000..5a6a69c --- /dev/null +++ b/hsrc/Main.hs @@ -0,0 +1,27 @@ +import Control.Exception +import Control.Monad +import qualified Data.ByteString as S +import System.Environment +import System.Exit +import System.IO.Error + +import Syntax.AST +import Syntax.Parser +import Syntax.ParserCore + +main :: IO () +main = do + [file] <- getArgs + ast <- loadModule file + putStrLn "Successful parse!" + putStrLn (show ast) + +loadModule :: FilePath -> IO (Module ()) +loadModule path = do + mtxt <- tryJust (guard . isDoesNotExistError) $ S.readFile path + case mtxt of + Left _ -> fail $ "Unable to open file: " ++ path + Right txt -> + case runParser path txt parseModule of + Left err -> printError err >> exitWith (ExitFailure 1) + Right ast -> return ast diff --git a/hsrc/Makefile b/hsrc/Makefile new file mode 100644 index 0000000..121a5c8 --- /dev/null +++ b/hsrc/Makefile @@ -0,0 +1,9 @@ +CURDIR := $(TOPDIR)/hsrc + +HSRC_FILES := Main + +HSRC_FILES_PREFIXED := $(addprefix $(CURDIR)/,$(HSRC_FILES)) +OBJECTS += $(addsuffix .o,$(HSRC_FILES_PREFIXED)) +HS_SOURCES += $(addsuffix .hs,$(HSRC_FILES_PREFIXED)) + +include $(CURDIR)/Syntax/Makefile diff --git a/hsrc/Syntax/AST.hs b/hsrc/Syntax/AST.hs new file mode 100644 index 0000000..16c9871 --- /dev/null +++ b/hsrc/Syntax/AST.hs @@ -0,0 +1,63 @@ +module Syntax.AST where + +data Show a => Module a = Module { + modName :: QualifiedName + , modImports :: [Import] + , modDecls :: [Decl a] + } + deriving (Show) + +data QualifiedName = QualifiedName { + qnPrefixes :: [String] + , qnName :: String + } + deriving (Show) + +data Import = Import { + imName :: QualifiedName + , imQualified :: Bool + , imList :: Maybe [ImportName] + , imAs :: Maybe QualifiedName + } + deriving (Show) + +data ImportName = ImportNamed QualifiedName + | ImportRenamed QualifiedName QualifiedName + deriving (Show) + +data Show a => Decl a = + DeclData [Type] QualifiedName [QualifiedName] [DataClause] + | DeclType + | DeclNewtype + | DeclClass + | DeclInstance + | DeclValue Type QualifiedName (Expr a) + deriving (Show) + +data DataClause = DataClause QualifiedName [Type] + deriving (Show) + +data Show a => Expr a = + Const a ConstVal + | VarRef a QualifiedName + | Cond a (Expr a) (Expr a) + | App a (Expr a) (Expr a) + | Block a [Expr a] + | Lambda a [QualifiedName] (Expr a) + deriving (Show) + +data Kind = Star | KFun Kind Kind + deriving (Eq,Show) + +data Type = TVar QualifiedName Kind + | TCon QualifiedName Kind + | TAp Type Type + | TGen Int + deriving (Show) + +data ConstVal = ConstInteger Int String + | ConstFloat String + | ConstChar String + | ConstString String + | ConstEmpty + deriving (Show) diff --git a/hsrc/Syntax/Lexer.x b/hsrc/Syntax/Lexer.x new file mode 100644 index 0000000..0eecc9b --- /dev/null +++ b/hsrc/Syntax/Lexer.x @@ -0,0 +1,125 @@ +{ +{-# LANGUAGE DeriveDataTypeable #-} +{-# OPTIONS -w #-} +module Syntax.Lexer where + +import qualified Codec.Binary.UTF8.Generic as UTF8 +import qualified Data.ByteString as S +import MonadLib + +import Syntax.ParserCore + +} + +-- Digits +$decdigit = 0-9 +$hexdigit = [0-9a-fA-f] +$octdigit = 0-7 +$bindigit = [01] + +-- Identifier Characters +$typestart = [A-Z\_] +$valstart = [a-z\_] +$identrest = [a-zA-Z0-9\_] +$opident = [\~\!\@\#\$\%\^\&\*\+\-\=\.\:\<\>\?\/\_] +$escape_char = [abfnrtv'\"\\] + +:- + +-- Whitespace + $white+ ; + "--".* ; + +-- Numbers + $decdigit+ { emitS (buildInt 10) } + "0x"$hexdigit+ { emitS (buildInt 16) } + "0o"$octdigit+ { emitS (buildInt 8) } + "0b"$bindigit+ { emitS (buildInt 2) } + $decdigit+"."$decdigit+ ("e""-"?$decdigit+)? { emitS TokFloat} + $decdigit+"e""-"?$decdigit+ { emitS TokFloat} + +-- Identifier + $typestart $identrest* { emitS TokTypeIdent } + $valstart $identrest* { emitS TokValIdent } + $opident+ { emitS TokOpIdent } + +-- Characters and Strings + ['].['] { emitS TokChar } + ['] [\\] $escape_char ['] { emitS TokChar } + [\"] ([^\"] | [\n] | ([\\] $escape_char))* [\"] { emitS TokString } + +-- Symbols + "(" { emitT LParen } + ")" { emitT RParen } + "[" { emitT LSquare } + "]" { emitT RSquare } + "{" { emitT LBrace } + "}" { emitT RBrace } + "|" { emitT Bar } + ";" { emitT Semi } + "," { emitT Comma } + +{ + +type AlexInput = (Position,Char,S.ByteString) + +emitT :: Token -> AlexInput -> Int -> Parser Lexeme +emitT tok (pos,_,_) _ = return $! Lexeme pos tok + +emitS :: (String -> Token) -> AlexInput -> Int -> Parser Lexeme +emitS mk (pos,c,bs) len = return $! Lexeme pos (mk input) + where input = UTF8.toString (S.take len bs) + +scan :: Parser Lexeme +scan = do + inp@(pos,_,_) <- alexGetInput + sc <- alexGetStartCode + case alexScan inp sc of + AlexEOF -> return $! Lexeme pos TokEOF + AlexError inp' -> do + let posStr = pprtPosition pos + alexError $ posStr ++ ": Lexical error." + AlexSkip inp' len' -> alexSetInput inp' >> scan + AlexToken inp' len action -> do + alexSetInput inp' + action inp len + +alexGetInput :: Parser AlexInput +alexGetInput = do + s <- get + return (psPos s, psChar s, psInput s) + +alexSetInput :: AlexInput -> Parser () +alexSetInput (pos,c,bs) = do + s <- get + set $! s { + psPos = pos + , psChar = c + , psInput = bs + } + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (p,_,bs) = do + (c,bs') <- UTF8.uncons bs + return (c, (movePos p c, c, bs')) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,c,_) = c + +alexError :: String -> Parser a +alexError = raiseL + +alexGetStartCode :: Parser Int +alexGetStartCode = psLexCode `fmap` get + +alexSetStartCode :: Int -> Parser () +alexSetStartCode code = do + s <- get + set $! s { psLexCode = code } + +begin code _ _ = alexSetStartCode code >> scan + +buildInt :: Int -> String -> Token +buildInt base val = TokInt (base, val) + +} diff --git a/hsrc/Syntax/Makefile b/hsrc/Syntax/Makefile new file mode 100644 index 0000000..88bca0d --- /dev/null +++ b/hsrc/Syntax/Makefile @@ -0,0 +1,14 @@ +CURDIR := $(TOPDIR)/hsrc/Syntax + +SYNFILES := Lexer ParserCore AST Parser + +SYNFILES_PREFIXED := $(addprefix $(CURDIR)/,$(SYNFILES)) +OBJECTS += $(addsuffix .o,$(SYNFILES_PREFIXED)) +HS_SOURCES += $(addsuffix .hs,$(SYNFILES_PREFIXED)) + +EXTRA_CLEAN += $(CURDIR)/Lexer.hs $(CURDIR)/Lexer.info \ + $(CURDIR)/Parser.hs $(CURDIR)/Parser.info + +.SECONDARY: $(CURDIR)/Lexer.hs $(CURDIR)/Parser.hs + +$(CURDIR)/Parser.d: $(CURDIR)/Lexer.d diff --git a/hsrc/Syntax/Parser.y b/hsrc/Syntax/Parser.y new file mode 100644 index 0000000..61affd5 --- /dev/null +++ b/hsrc/Syntax/Parser.y @@ -0,0 +1,245 @@ +{ +{-# OPTIONS_GHC -w #-} + +-- vim: filetype=haskell + +module Syntax.Parser where + +import Syntax.AST +import Syntax.Lexer +import Syntax.ParserCore + +import MonadLib +import qualified Codec.Binary.UTF8.Generic as UTF8 + +} + +%token + +-- reserved words + 'module' { Lexeme $$ (TokValIdent "module" ) } + 'export' { Lexeme $$ (TokValIdent "export" ) } + 'import' { Lexeme $$ (TokValIdent "import" ) } + 'data' { Lexeme $$ (TokValIdent "data" ) } + 'type' { Lexeme $$ (TokValIdent "type" ) } + 'newtype' { Lexeme $$ (TokValIdent "newtype" ) } + 'class' { Lexeme $$ (TokValIdent "class" ) } + 'instance' { Lexeme $$ (TokValIdent "instance") } + 'qualified' { Lexeme $$ (TokValIdent "instance") } + 'as' { Lexeme $$ (TokValIdent "instance") } + +-- symbols + '=' { Lexeme $$ (TokOpIdent "=") } + '->' { Lexeme $$ (TokOpIdent "->") } + '=>' { Lexeme $$ (TokOpIdent "=>") } + '\\' { Lexeme $$ (TokOpIdent "\\") } + '(' { Lexeme $$ LParen } + ')' { Lexeme $$ RParen } + '[' { Lexeme $$ LSquare } + ']' { Lexeme $$ RSquare } + '{' { Lexeme $$ LBrace } + '}' { Lexeme $$ RBrace } + '|' { Lexeme $$ Bar } + ';' { Lexeme $$ Semi } + ',' { Lexeme $$ Comma } + +-- identifiers + TYPE_IDENT { Lexeme _ (TokTypeIdent $$) } + VAL_IDENT { Lexeme _ (TokValIdent $$) } + OP_IDENT { Lexeme _ (TokOpIdent $$) } + +-- values + INTVAL { Lexeme _ (TokInt $$) } + FLOATVAL { Lexeme _ (TokFloat $$) } + CHARVAL { Lexeme _ (TokChar $$) } + STRVAL { Lexeme _ (TokString $$) } + +%monad { Parser } { (>>=) } { return } +%name parseModule top_module +%tokentype { Lexeme } + +%lexer { lexer } { Lexeme initPosition TokEOF } + +%% + +top_module :: { Module () } : 'module' TYPE_IDENT module_decls { + let (imports,items) = $3 + in Module (makeQualified $2) imports items + } + +module_decls :: { ([Import], [Decl ()]) } + : module_decls module_decl { $1 `pappend` $2 } + | module_decl { $1 } + +module_decl :: { ([Import], [Decl ()]) } + : data_decl { ([], [$1]) } + | type_decl { ([], [$1]) } + | newtype_decl { ([], [$1]) } + | class_decl { ([], [$1]) } + | instance_decl { ([], [$1]) } + | value_decl { ([], [$1]) } + | import_decl { ([$1], [] ) } + +-- Data Declarations -------------------------------------------------------- + +data_decl :: { Decl () } + : 'data' mqualifiers TYPE_IDENT data_args dataclauses + { DeclData $2 (makeQualified $3) $4 $5 } + +mqualifiers :: { [Type] } + : { [] } + | '(' tqualifiers ')' '=>' { $2 } + +tqualifiers :: { [Type] } + : tqualifier { [$1] } + | tqualifiers ',' tqualifier { $1 ++ [$3] } + +tqualifier :: { Type } + : TYPE_IDENT VAL_IDENT + { TAp (TCon (makeQualified $1) Star) (TVar (makeQualified $2) Star) } + | tqualifier VAL_IDENT + { TAp $1 (TVar (makeQualified $2) Star) } + +data_args :: { [QualifiedName] } + : { [] } + | data_args VAL_IDENT { $1 ++ [makeQualified $2] } + +dataclauses :: { [DataClause] } + : '=' dataclause { [$2] } + | dataclauses '|' dataclause { $1 ++ [$3] } + +dataclause :: { DataClause } + : TYPE_IDENT { DataClause (makeQualified $1) [] } + | dataclause bangtype { let DataClause name items = $1 + in DataClause name (items ++ [$2]) } + +-- Type alias Declarations -------------------------------------------------- + +type_decl :: { Decl () } + : 'type' { DeclType } + +newtype_decl :: { Decl () } + : 'newtype' { DeclNewtype } + +class_decl :: { Decl () } + : 'class' { DeclClass } + +instance_decl :: { Decl () } + : 'instance' { DeclInstance } + +value_decl :: { Decl () } + : VAL_IDENT '=' expr { DeclValue undefined (makeQualified $1) $3 } + +import_decl :: { Import } + : 'import' mqualified TYPE_IDENT mimport_list mas + { Import (makeQualified $3) $2 $4 $5 } + +mqualified :: { Bool } + : { False } + | 'qualified' { True } + +mimport_list :: { Maybe [ImportName] } + : { Nothing } + | '(' ')' { Just [] } + | '(' import_list ')' { Just $2 } + +mas :: { Maybe QualifiedName } + : { Nothing } + | 'as' TYPE_IDENT { Just (makeQualified $2) } + +import_list :: { [ImportName] } + : import_name { [$1] } + | import_list ',' import_name { $1 ++ [$3] } + +import_name :: { ImportName } + : either_ident { ImportNamed $1 } + | either_ident 'as' either_ident { ImportRenamed $1 $3 } + +either_ident :: { QualifiedName } + : TYPE_IDENT { makeQualified $1 } + | VAL_IDENT { makeQualified $1 } + +-- Types -------------------------------------------------------------------- + +bangtype :: { Type } + : bangtype1 { $1 } + +bangtype1 :: { Type } + : bangtype1 VAL_IDENT { TAp $1 (TVar (makeQualified $2) Star) } + | bangtype2 { $1 } + +bangtype2 :: { Type } + : bangtype2 '->' bangtype3 + { TAp (TAp (TCon (QualifiedName [] "->") Star) $1) $3 } + | bangtype3 { $1 } + +bangtype3 :: { Type } + : TYPE_IDENT { TVar (makeQualified $1) Star } + | VAL_IDENT { TVar (makeQualified $1) Star } + | '(' bangtype ')' { $2 } + +-- Expressions -------------------------------------------------------------- + +expr :: { Expr () } + : '\\' arglist '->' expr1 { Lambda () $2 $4 } + | expr1 { $1 } + +arglist :: { [QualifiedName] } + : VAL_IDENT { [makeQualified $1] } + | arglist VAL_IDENT { $1 ++ [makeQualified $2] } + +expr1 :: { Expr () } + : '{' exprs '}' { Block () $2 } + | expr2 { $1 } + +exprs :: { [Expr ()] } + : expr ';' { [$1] } + | exprs expr ';' { $1 ++ [$2] } + +expr2 :: { Expr () } + : '[' list_exprs ']' { unwindList $2 } + | expr3 { $1 } + +list_exprs :: { [Expr ()] } + : { [] } + | list_exprs ',' expr3 { $1 ++ [$3] } + +expr3 :: { Expr () } + : bottom_expr { $1 } + +bottom_expr :: { Expr () } + : INTVAL { let (b,v) = $1 in Const () (ConstInteger b v) } + | FLOATVAL { Const () (ConstFloat $1) } + | CHARVAL { Const () (ConstChar $1) } + | STRVAL { Const () (ConstString $1) } + | VAL_IDENT { VarRef () (makeQualified $1) } + | '(' expr ')' { $2 } + +{ +lexer :: (Lexeme -> Parser a) -> Parser a +lexer k = scan >>= k + +happyError :: Parser a +happyError = raiseP "Parse Error" + +pappend :: ([a],[b]) -> ([a],[b]) -> ([a],[b]) +pappend (a,b) (c,d) = (a++c,b++d) + +unwindList :: [Expr ()] -> Expr () +unwindList [] = Const () ConstEmpty +unwindList (a:rest) = + App () (App () (VarRef () (QualifiedName ["Data","List"] ":")) a) + (unwindList rest) + +makeQualified :: String -> QualifiedName +makeQualified str = QualifiedName prefixes name + where + (prefixes,name) = loop str + loop val = + let (pre,rest) = span (/= '.') val + in if rest == "" + then ([], pre) + else let (pres, name) = loop rest + in (pre:pres, name) +} + diff --git a/hsrc/Syntax/ParserCore.hs b/hsrc/Syntax/ParserCore.hs new file mode 100644 index 0000000..6f6bccd --- /dev/null +++ b/hsrc/Syntax/ParserCore.hs @@ -0,0 +1,140 @@ +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +module Syntax.ParserCore where + +import Control.Applicative(Applicative) +import qualified Data.ByteString as S +import MonadLib + +-- -------------------------------------------------------------------------- +-- Positions +-- + +data Position = Position { + posOff :: !Int + , posLine :: !Int + , posCol :: !Int + , posFile :: !FilePath + } + deriving (Show) + +initPosition :: FilePath -> Position +initPosition = Position 0 1 1 + +movePos :: Position -> Char -> Position +movePos (Position o l c f) '\t' = Position (o+1) l (c+8) f +movePos (Position o l _ f) '\n' = Position (o+1) (l+1) 0 f +movePos (Position o l c f) _ = Position (o+1) l (c+1) f + +pprtPosition :: Position -> String +pprtPosition p = posFile p ++ ":" ++ show (posLine p) ++ ":" ++ show (posCol p) + +-- -------------------------------------------------------------------------- +-- Tokens +-- + +data Token = LParen | RParen + | LSquare | RSquare + | LBrace | RBrace + | Bar | Semi | Comma + | TokTypeIdent String + | TokValIdent String + | TokOpIdent String + | TokInt (Int,String) + | TokFloat String + | TokChar String + | TokString String + | TokEOF + deriving (Eq, Show) + +-- -------------------------------------------------------------------------- +-- Lexemes +-- + +data Lexeme = Lexeme { + lexPos :: !Position + , lexTok :: Token + } + deriving (Show) + +instance Eq Lexeme where + a == b = lexTok a == lexTok b + +-- -------------------------------------------------------------------------- +-- Errors +-- + +data ErrorType = + LexerError + | ParserError + deriving (Show) + +data Error = Error ErrorType String Position + deriving (Show) + +printError :: Error -> IO () +printError (Error etype str pos) = putStrLn errstr + where + errstr = pprtPosition pos ++ ":" ++ etypeStr ++ ": " ++ str + etypeStr = case etype of + LexerError -> "LEX" + ParserError -> "PARSE" + +-- -------------------------------------------------------------------------- +-- ParserState +-- + +data ParserState = ParserState { + psInput :: !S.ByteString + , psChar :: !Char + , psPos :: !Position + , psLexCode :: !Int + } + deriving (Show) + +initParserState :: FilePath -> S.ByteString -> ParserState +initParserState path bs = ParserState { + psInput = bs + , psChar = '\n' + , psPos = initPosition path + , psLexCode = 0 + } + +-- -------------------------------------------------------------------------- +-- Parser +-- + +newtype Parser a = Parser { + unParser :: StateT ParserState (ExceptionT Error Id) a + } deriving (Functor, Applicative, Monad) + +instance StateM Parser ParserState where + get = Parser get + set = Parser . set + +instance ExceptionM Parser Error where + raise = Parser . raise + +instance RunExceptionM Parser Error where + try m = Parser (try (unParser m)) + +-- |Raise a lexer error +raiseL :: String -> Parser a +raiseL msg = do + st <- get + raise (Error LexerError msg (psPos st)) + +-- |Raise a parser error +raiseP :: String -> Parser a +raiseP msg = do + st <- get + raise (Error ParserError msg (psPos st)) + +-- |Run the parser over the given file +runParser :: FilePath -> S.ByteString -> Parser a -> Either Error a +runParser path bs (Parser m) = + case runM m (initParserState path bs) of + Right (a,_) -> Right a + Left err -> Left err + diff --git a/mk/build.mk b/mk/build.mk new file mode 100644 index 0000000..60cc8b3 --- /dev/null +++ b/mk/build.mk @@ -0,0 +1,42 @@ +ifeq ($(V),) + quiet = quiet_ + Q = @ +else + quiet = + Q = +endif + +echo-cmd = $(if $($(quiet)cmd_$(1)), echo " $($(quiet)cmd_$(1))";) +cmd = @$(echo-cmd) $(cmd_$(1)) + +# ghc +cmd_ghc_o_hs = $(GHC) $(GHC_FLAGS) -c $< +quiet_cmd_ghc_o_hs = GHC $(notdir $@) +%.o : %.hs + $(call cmd,ghc_o_hs) +%.hi : %.o + @: + +# ghc-depends +cmd_ghc_d_hs = $(GHC) $(GHC_FLAGS) -M -dep-makefile $@ $< +quiet_cmd_ghc_d_hs = DEPEND $(notdir $@) +%.d : %.hs + $(call cmd,ghc_d_hs) + @$(SED) -i "s!: hsrc!: $(TOPDIR)/hsrc!g" $@ + +# ghc-ld +cmd_ghc_ld = $(GHC) -o $@ $^ +quiet_cmd_ghc_ld = LD $(notdir $@) + +# alex +cmd_alex_hs_x = $(ALEX) $(ALEXFLAGS) -i -o $@ $< +quiet_cmd_alex_hs_x = ALEX $(notdir $@) +%.hs : %.x + $(call cmd,alex_hs_x) + +# happy +cmd_happy_hs_y = $(HAPPY) $(HAPPYFLAGS) -i -o $@ $< +quiet_cmd_happy_hs_y = HAPPY $(notdir $@) +%.hs : %.y + $(call cmd,happy_hs_y) +