{-# LANGUAGE TypeOperators, FlexibleInstances #-} -- | -- This module provides the same functions and combinators as "GLL.Combinators.Interface". -- The only difference is that the combinators of this module construct only symbol expressions ('SymbExpr'/'BNF'). -- The combinators are therefore easier to use: they are just as freely combined but with simpler types and simpler type-errors. -- However, the the underlying grammars are binarised, resulting in slower parsing. module GLL.Combinators.BinaryInterface ( -- * Elementary parsers term_parser, satisfy, -- ** Elementary parsers using the 'Token' datatype keychar, keyword, int_lit, float_lit, bool_lit, char_lit, string_lit, alt_id_lit, id_lit, token, -- ** Elementary character-level parsers char, -- * Elementary combinators -- *** Sequencing (<**>), -- *** Choice (<||>), -- *** Semantic actions (<$$>), -- *** Nonterminal introduction (<:=>),(<::=>),chooses,chooses_prec, -- * Types -- ** Grammar (combinator expression) types BNF, SymbExpr, toSymb, mkRule, -- ** Parseable token types Token(..), Parseable(..), SubsumesToken(..), unlexTokens, unlexToken, -- * Running a parser grammarOf, parse, printParseData, evaluatorWithParseData, -- ** Running a parser with options parseWithOptions, parseWithParseOptions, printParseDataWithOptions, evaluatorWithParseDataAndOptions,printGrammarData, -- *** Possible options CombinatorOptions, CombinatorOption, GLL.Combinators.Options.maximumErrors, throwErrors, maximumPivot, maximumPivotAtNt, leftBiased, -- **** Parser options fullSPPF, allNodes, packedNodesOnly, strictBinarisation, GLL.Parser.noSelectTest, -- *** Running a parser with options and explicit failure parseWithOptionsAndError, parseWithParseOptionsAndError, -- ** Runing a parser to obtain 'ParseResult'. parseResult, parseResultWithOptions,ParseResult(..), -- ** Builtin lexers. default_lexer, -- *** Lexer settings lexer, LexerSettings(..), emptyLanguage, -- * Derived combinators mkNt, -- *** Ignoring semantic results (<$$), (**>), (<**), -- *** EBNF patterns optional, preferably, reluctantly, optionalWithDef, multiple, multiple1, multipleSepBy, multipleSepBy1, multipleSepBy2, within, parens, braces, brackets, angles, -- *** Disambiguation (<:=), (<::=),(<<<**>), (<**>>>), (<<**>), (<<<**), (**>>>), (<**>>), longest_match,shortest_match, many, many1, some, some1, manySepBy, manySepBy1, manySepBy2, someSepBy, someSepBy1,someSepBy2, -- * Memoisation memo, newMemoTable, memClear, MemoTable, MemoRef, useMemoisation, ) where import GLL.Combinators.Interface hiding (within, (**>), (<**>), (<**), (<<<**>), (<<<**), (**>>>), (<**>>>), satisfy, (<||>), (<||), (||>), (<$$>), (<$$), (<:=>), (<:=),(<::=>), (<::=), mkNt, manySepBy, manySepBy1, manySepBy2, multiple, multipleSepBy, many, multipleSepBy1, multipleSepBy2, someSepBy, someSepBy1, someSepBy2, some, memo, some1, many1, multiple1, shortest_match, longest_match, (<**>>), (<<**>), angles, braces, brackets, parens, within, optional, optionalWithDef, preferably, reluctantly, chooses, chooses_prec) import qualified GLL.Combinators.Interface as IF import GLL.Combinators.Options import GLL.Parser hiding (parse, parseWithOptions) infixl 2 <:=> -- | -- Form a rule by giving the name of the left-hand side of the new rule. -- Use this combinator on recursive non-terminals. (<:=>) :: (Show t, Ord t) => String -> BNF t a -> BNF t a n <:=> p = n IF.<:=> p infixl 2 <::=> -- | -- Variant of '<:=>' for recursive non-terminals that have a potentially infinite -- number of derivations for some input string. -- -- A non-terminal yields infinitely many derivations -- if and only if it is left-recursive and would be -- left-recursive if all the right-hand sides of the productions of the -- grammar are reversed. (<::=>) :: (Show t, Ord t) => String -> BNF t a -> BNF t a n <::=> p = n IF.<::=> p -- | Variant of '<::=>' that can be supplied with a list of alternates chooses :: (Show t, Ord t) => String -> [BNF t a] -> BNF t a chooses p alts = IF.chooses p alts -- | Variant of '<::=' that can be supplied with a list of alternates chooses_prec :: (Show t, Ord t) => String -> [BNF t a] -> BNF t a chooses_prec p alts = IF.chooses_prec p alts infixl 4 <$$> -- | -- Form an 'AltExpr' by mapping some semantic action overy the result -- of the second argument. (<$$>) :: (Show t, Ord t) => (a -> b) -> BNF t a -> BNF t b f <$$> p' = IF.toSymb (f IF.<$$> p') infixl 4 <**>,<<<**>,<**>>> -- | -- Add a 'SymbExpr' to the right-hand side represented by an 'AltExpr' -- creating a new 'AltExpr'. -- The semantic result of the first argument is applied to the second -- as a cross-product. (<**>) :: (Show t, Ord t) => BNF t (a -> b) -> BNF t a -> BNF t b pl' <**> pr' = IF.toSymb (pl' IF.<**> pr') -- | Variant of '<**>' that applies longest match on the left operand. (<**>>>) :: (Show t, Ord t) => BNF t (a -> b) -> BNF t a -> BNF t b pl' <**>>> pr' = IF.toSymb (pl' IF.<**>>> pr') -- | Variant of '<**>' that applies shortest match on the left operand. (<<<**>) :: (Show t, Ord t) => BNF t (a -> b) -> BNF t a -> BNF t b pl' <<<**> pr' = IF.toSymb (pl' IF.<<<**> pr') infixr 3 <||> -- | -- Add an 'AltExpr' to a list of 'AltExpr' -- The resuling '[] :. AltExpr' forms the right-hand side of a rule. (<||>) :: (Show t, Ord t) => BNF t a -> BNF t a -> BNF t a l' <||> r' = IF.toSymb (l' IF.<||> r') -- | -- Apply this combinator to an alternative to turn all underlying occurrences -- of '<**>' (or variants) apply 'longest match'. longest_match :: (Show t, Ord t) => BNF t a -> BNF t a longest_match isalt = IF.toSymb (IF.longest_match isalt) -- Apply this combinator to an alternative to turn all underlying occurrences -- of '<**>' (or variants) apply 'shortest match'. shortest_match :: (Show t, Ord t) => BNF t a -> BNF t a shortest_match isalt = IF.toSymb (IF.shortest_match isalt) -- | The empty right-hand side that yields its -- first argument as a semantic result. satisfy :: (Show t, Ord t ) => a -> BNF t a satisfy a = IF.toSymb (IF.satisfy a) -- | -- This function memoises a parser, given: -- -- * A 'MemoRef' pointing to a fresh 'MemoTable', created using 'newMemoTable'. -- * The 'SymbExpr' to memoise. -- -- Use 'memo' on those parsers that are expected to derive the same -- substring multiple times. If the same combinator expression is used -- to parse multiple times the 'MemoRef' needs to be cleared using 'memClear'. -- -- 'memo' relies on 'unsafePerformIO' and is therefore potentially unsafe. -- The option 'useMemoisation' enables memoisation. -- It is off by default, even if 'memo' is used in a combinator expression. memo :: (Ord t, Show t) => MemoRef [a] -> BNF t a -> BNF t a memo ref p' = IF.memo ref p' -- | -- Helper function for defining new combinators. -- Use 'mkNt' to form a new unique non-terminal name based on -- the symbol of a given 'SymbExpr' and a 'String' that is unique to -- the newly defined combinator. mkNt :: (Show t, Ord t) => BNF t a -> String -> String mkNt p str = IF.mkNt p str -- | -- Variant of '<$$>' that ignores the semantic result of its second argument. (<$$) :: (Show t, Ord t) => b -> BNF t a -> BNF t b f <$$ p = const f <$$> p infixl 4 <$$ -- | infixl 4 **>, <<**>, **>>> -- | -- Variant of '<**>' that ignores the semantic result of the first argument. (**>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b l **> r = flip const <$$> l <**> r -- Variant of '<**>' that applies longest match on its left operand. (**>>>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b l **>>> r = flip const <$$> l <**>>> r -- Variant of '<**>' that ignores shortest match on its left operand. (<<**>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t b l <<**>r = flip const <$$> l <<<**> r infixl 4 <**, <<<**, <**>> -- | -- Variant of '<**>' that ignores the semantic result of the second argument. (<**) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a l <** r = const <$$> l <**> r -- | Variant of '<**' that applies longest match on its left operand. (<**>>) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a l <**>> r = const <$$> l <**>>> r -- | Variant '<**' that applies shortest match on its left operand (<<<**) :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t a l <<<** r = const <$$> l <<<**> r -- | -- Variant of '<::=>' that prioritises productions from left-to-right (or top-to-bottom). x <::= altPs = x IF.<::= altPs infixl 2 <::= -- | -- Variant of '<:=>' that prioritises productions from left-to-right (or top-to-bottom). x <:= altPs = x IF.<:= altPs infixl 2 <:= -- | Try to apply a parser multiple times (0 or more) with shortest match -- applied to each occurrence of the parser. many :: (Show t, Ord t) => BNF t a -> BNF t [a] many = multiple_ (<<<**>) -- | Try to apply a parser multiple times (1 or more) with shortest match -- applied to each occurrence of the parser. many1 :: (Show t, Ord t) => BNF t a -> BNF t [a] many1 = multiple1_ (<<<**>) -- | Try to apply a parser multiple times (0 or more) with longest match -- applied to each occurrence of the parser. some :: (Show t, Ord t) => BNF t a -> BNF t [a] some = multiple_ (<**>>>) -- | Try to apply a parser multiple times (1 or more) with longest match -- applied to each occurrence of the parser. some1 :: (Show t, Ord t) => BNF t a -> BNF t [a] some1 = multiple1_ (<**>>>) -- | Try to apply a parser multiple times (0 or more). The results are returned in a list. -- In the case of ambiguity the largest list is returned. multiple :: (Show t, Ord t) => BNF t a -> BNF t [a] multiple = multiple_ (<**>) -- | Try to apply a parser multiple times (1 or more). The results are returned in a list. -- In the case of ambiguity the largest list is returned. multiple1 :: (Show t, Ord t) => BNF t a -> BNF t [a] multiple1 = multiple1_ (<**>) -- | Internal multiple_ disa p = let fresh = mkNt p "*" in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p) <||> satisfy [] -- | Internal multiple1_ disa p = let fresh = mkNt p "+" in fresh <::=> ((:) <$$> p) `disa` (multiple_ disa p) -- | Same as 'many' but with an additional separator. manySepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] manySepBy = sepBy many -- | Same as 'many1' but with an additional separator. manySepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] manySepBy1 = sepBy1 many -- | Same as 'some1' but with an additional separator. someSepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] someSepBy = sepBy some -- | Same as 'some1' but with an additional separator. someSepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] someSepBy1 = sepBy1 some -- | Same as 'multiple' but with an additional separator. multipleSepBy :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] multipleSepBy = sepBy multiple -- | Same as 'multiple1' but with an additional separator. multipleSepBy1 :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t [a] multipleSepBy1 = sepBy1 multiple sepBy :: (Show t, Ord t) => (BNF t a -> BNF t [a]) -> BNF t a -> BNF t b -> BNF t [a] sepBy mult p c = mkRule $ satisfy [] <||> (:) <$$> p <**> mult (c **> p) sepBy1 :: (Show t, Ord t) => (BNF t a -> BNF t [a]) -> BNF t a -> BNF t b -> BNF t [a] sepBy1 mult p c = mkRule $ (:) <$$> p <**> mult (c **> p) -- | Like 'multipleSepBy1' but matching at least two occurrences of the -- first argument. The returned list is therefore always of at least -- length 2. At least one separator will be consumed. multipleSepBy2 p s = mkRule $ (:) <$$> p <** s <**> multipleSepBy1 p s -- | Like 'multipleSepBy2' but matching the minimum number of -- occurrences of the first argument as possible (at least 2). someSepBy2 p s = mkRule $ (:) <$$> p <** s <**> someSepBy1 p s -- | Like 'multipleSepBy2' but matching the maximum number of -- occurrences of the first argument as possible (at least 2). manySepBy2 p s = mkRule $ (:) <$$> p <** s <**> manySepBy1 p s -- | Derive either from the given symbol or the empty string. optional :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a) optional p = fresh <:=> Just <$$> p <||> satisfy Nothing where fresh = mkNt p "?" -- | Version of 'optional' that prefers to derive from the given symbol, -- affects only nullable nonterminal symbols preferably :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a) preferably p = fresh <:= Just <$$> p <||> satisfy Nothing where fresh = mkNt p "?" -- | Version of 'optional' that prefers to derive the empty string from -- the given symbol, affects only nullable nonterminal symbols reluctantly :: (Show t, Ord t) => BNF t a -> BNF t (Maybe a) reluctantly p = fresh <:= satisfy Nothing <||> Just <$$> p where fresh = mkNt p "?" optionalWithDef :: (Show t, Ord t) => BNF t a -> a -> BNF t a optionalWithDef p def = mkNt p "?" <:=> id <$$> p <||> satisfy def -- | Place a piece of BNF /within/ two other BNF fragments, ignoring their semantics. within :: (Show t, Ord t) => BNF t a -> BNF t b -> BNF t c -> BNF t b within l p r = IF.toSymb (l **> p <** r) -- | Place a piece of BNF between the characters '(' and ')'. parens p = within (keychar '(') p (keychar ')') -- | Place a piece of BNF between the characters '{' and '}'. braces p = within (keychar '{') p (keychar '}') -- | Place a piece of BNF between the characters '[' and ']'. brackets p = within (keychar '[') p (keychar ']') -- | Place a piece of BNF between the characters '<' and '>'. angles p = within (keychar '<') p (keychar '>') -- | Place a piece of BNF between two single quotes. quotes p = within (keychar '\'') p (keychar '\'') -- | Place a piece of BNF between two double quotes. dquotes p = within (keychar '"') p (keychar '"')