| Copyright | (C) 2026 - Eitan Chatav |
|---|---|
| License | BSD-style (see the file LICENSE) |
| Maintainer | Eitan Chatav <eitan.chatav@gmail.com> |
| Stability | provisional |
| Portability | non-portable |
| Safe Haskell | None |
| Language | Haskell2010 |
Control.Lens.Grammar
Description
See Chomsky, On Certain Formal Properties of Grammars
Synopsis
- type RegGrammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p) => p a a
- type Lexical token (p :: Type -> Type -> Type) = (forall x y. (x ~ (), y ~ ()) => TerminalSymbol token (p x y), forall x y. (x ~ token, y ~ token) => TokenAlgebra token (p x y))
- newtype RegString = RegString {}
- regstringG :: RegGrammar Char a -> RegString
- regexGrammar :: Grammar Char RegString
- type Grammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p, forall x. BackusNaurForm (p x x)) => p a a
- newtype RegBnf = RegBnf {}
- regbnfG :: Grammar Char a -> RegBnf
- regbnfGrammar :: Grammar Char RegBnf
- applicativeG :: (Alternative f, TokenAlgebra token (f token), TerminalSymbol token (f ()), forall x. BackusNaurForm (f x)) => Grammar token a -> f a
- transducerG :: Categorized token => Grammar token a -> Transducer token
- type CtxGrammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p, Filtrator p, MonadicTry p) => p a a
- printG :: (Cons string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) => CtxGrammar token a -> a -> m (string -> string)
- parseG :: (Cons string string token token, Snoc string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) => CtxGrammar token a -> string -> m (a, string)
- unparseG :: (Cons string string token token, Snoc string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) => CtxGrammar token a -> a -> string -> m string
- parsecG :: (Cons string string token token, Snoc string string token token, Item string ~ token, Categorized token) => CtxGrammar token a -> string -> ParsecState string a
- unparsecG :: (Cons string string token token, Snoc string string token token, Item string ~ token, Categorized token) => CtxGrammar token a -> a -> string -> ParsecState string a
- readG :: CtxGrammar Char a -> ReadP a
- monadG :: (MonadTry m, TokenAlgebra token (m token), TerminalSymbol token (m ())) => CtxGrammar token a -> m a
- putStringLn :: (IsList string, Item string ~ Char) => string -> IO ()
- module Control.Lens.Grammar.BackusNaur
- module Control.Lens.Grammar.Boole
- module Control.Lens.Grammar.Kleene
- module Control.Lens.Grammar.Machine
- module Control.Lens.Grammar.Symbol
- module Control.Lens.Grammar.Token
- module Control.Lens.PartialIso
- module Control.Monad.Fail.Try
- module Data.Profunctor.Distributor
- module Data.Profunctor.Filtrator
- module Data.Profunctor.Grammar
- module Data.Profunctor.Grammar.Parsector
- module Data.Profunctor.Monoidal
- module Data.Profunctor.Separator
- module Data.Traversable.Homogeneous
Regular grammar
type RegGrammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p) => p a a Source #
A regular grammar may be constructed using
Lexical and Alternator combinators.
Let's see an example using
semantic versioning syntax.
>>>import Numeric.Natural (Natural)>>>:{data SemVer = SemVer -- e.g., 2.1.5-rc.1+build.123 { major :: Natural -- e.g., 1 , minor :: Natural -- e.g., 2 , patch :: Natural -- e.g., 3 , preRelease :: [String] -- e.g., "alpha.1", "rc.2" , buildMetadata :: [String] -- e.g., "build.123", "20130313144700" } deriving (Eq, Ord, Show, Read) :}
We'd like to define an optic _SemVer,
corresponding to the constructor pattern SemVer.
You could generate it with the TemplateHaskell combinator,
makeNestedPrisms.
makeNestedPrisms ''SemVer
Unfortunately, we can't use TemplateHaskell to generate it in GHCi,
which is used to test this documenation.
Here is equivalent Haskell code instead.
Since SemVer has only one constructor,
_SemVer can be an Iso.
>>>:set -XRecordWildCards>>>import Control.Lens (Iso', iso)>>>:{_SemVer :: Iso' SemVer (Natural, (Natural, (Natural, ([String], [String])))) _SemVer = iso (\SemVer {..} -> (major, (minor, (patch, (preRelease, buildMetadata))))) (\(major, (minor, (patch, (preRelease, buildMetadata)))) -> SemVer {..}) :}
Now we can build a RegGrammar for SemVer using the "idiom" style of
Applicative parsing with a couple modifications.
>>>:{semverGrammar :: RegGrammar Char SemVer semverGrammar = _SemVer >? numberG >*< terminal "." >* numberG >*< terminal "." >* numberG >*< optionP _Empty (terminal "-" >* identifiersG) >*< optionP _Empty (terminal "+" >* identifiersG) where numberG = iso show read >~ someP (asIn @Char DecimalNumber) identifiersG = several1 (sepWith ".") (someP charG) charG = asIn LowercaseLetter <|> asIn UppercaseLetter <|> asIn DecimalNumber <|> token '-' :}
Instead of using the constructor SemVer with the Functor applicator <$>,
we use the optic _SemVer we defined and the Choice applicator >?;
although, we could have used the Profunctor applicator >~ instead,
because _SemVer is an Iso. A few Alternative
combinators like <|> work both Functorially and Profunctorially.
| Functorial | Profunctorial |
|---|---|
SemVer | _SemVer |
<$> | >? |
pure | pureP |
*> | >* |
<* | *< |
<*> | >*< |
empty | empty |
<|> | <|> |
choice | choice |
many | manyP |
some | someP |
optional | optionalP |
You can generate a RegString from a RegGrammar with regstringG.
>>>putStringLn (regstringG semverGrammar)\p{Nd}+(.\p{Nd}+(.\p{Nd}+((-((\p{Ll}|\p{Lu}|\p{Nd}|-)+(.(\p{Ll}|\p{Lu}|\p{Nd}|-)+)*))?(\+((\p{Ll}|\p{Lu}|\p{Nd}|-)+(.(\p{Ll}|\p{Lu}|\p{Nd}|-)+)*))?)))
You can also generate parsers and printers.
>>>[parsed | (parsed, "") <- parseG semverGrammar "2.1.5-rc.1+build.123"][SemVer {major = 2, minor = 1, patch = 5, preRelease = ["rc","1"], buildMetadata = ["build","123"]}]
Parsing unconses tokens left-to-right, from the beginning of a string.
Unparsing, on the other hand, snocs tokens left-to-right, to the end of a string.
>>>unparseG semverGrammar (SemVer 1 0 0 ["alpha"] []) "SemVer: " :: Maybe StringJust "SemVer: 1.0.0-alpha"
Printing, on the gripping hand, conses tokens right-to-left, to the beginning of a string.
>>>($ " is the SemVer.") <$> printG semverGrammar (SemVer 1 2 3 [] []) :: Maybe StringJust "1.2.3 is the SemVer."
Profunctorial combinators give us correct-by-construction invertible parsers.
New RegGrammar generators can be defined with new instances of Lexical Alternators.
type Lexical token (p :: Type -> Type -> Type) = (forall x y. (x ~ (), y ~ ()) => TerminalSymbol token (p x y), forall x y. (x ~ token, y ~ token) => TokenAlgebra token (p x y)) Source #
Lexical combinators include terminal symbols,
Tokenized combinators and tokenClasses.
RegStrings are an embedded domain specific language
of regular expression strings.
Since they are strings, they have a string-like interface.
>>>let rex = fromString "ab|c" :: RegString>>>putStringLn rexab|c>>>rex"ab|c"
RegStrings can be generated from RegGrammars with regstringG.
>>>regstringG (terminal "a" >* terminal "b" <|> terminal "c")"ab|c"
RegStrings are actually stored as an algebraic datatype, RegEx.
>>>runRegString rexRegExam (Alternate (Sequence (RegExam (OneOf (fromList "a"))) (RegExam (OneOf (fromList "b")))) (RegExam (OneOf (fromList "c"))))
RegStrings are similar to regular expression strings in many other
programming languages. We can use them to see if a word and pattern
are Matching.
>>>"ab" =~ rexTrue>>>"c" =~ rexTrue>>>"xyz" =~ rexFalse
Like RegGrammars, RegStrings can use all the Lexical combinators.
Unlike RegGrammars, instead of using Monoidal and Alternator combinators,
RegStrings use Monoid and KleeneStarAlgebra combinators.
>>>terminal "a" <> terminal "b" >|< terminal "c" :: RegString"ab|c">>>mempty :: RegString""
Since RegStrings are a KleeneStarAlgebra,
they support Kleene quantifiers.
>>>starK rex"(ab|c)*">>>plusK rex"(ab|c)+">>>optK rex"(ab|c)?"
Like other regular expression languages, RegStrings support
character classes.
>>>oneOf "abc" :: RegString"[abc]">>>notOneOf "abc" :: RegString"[^abc]"
The character classes are used for failure, matching no character or string, as well as the wildcard, matching any single character.
>>>zeroK :: RegString"[]">>>anyToken :: RegString"[^]"
Additional forms of character classes test for a character's GeneralCategory.
>>>asIn LowercaseLetter :: RegString"\\p{Ll}">>>notAsIn Control :: RegString"\\P{Cc}"
KleeneStarAlgebras support alternation >|<,
and the Tokenized combinators are all negatable.
However, we'd like to be able to take the
intersection of character classes as well.
RegStrings can combine characters' tokenClasses
using BooleanAlgebra combinators.
>>>tokenClass (notOneOf "abc" >&&< notOneOf "xyz") :: RegString"[^abcxyz]">>>tokenClass (oneOf "abcxyz" >&&< notOneOf "xyz") :: RegString"[abc]">>>tokenClass (notOneOf "#$%" >&&< notAsIn Control) :: RegString"[^#$%\\P{Cc}]">>>tokenClass (allB notAsIn [MathSymbol, Control]) :: RegString"\\P{Sm|Cc}">>>tokenClass (notB (oneOf "xyz")) :: RegString"[^xyz]"
Ill-formed RegStrings normalize to failure.
>>>fromString ")(" :: RegString"[]"
Constructors
| RegString | |
Fields | |
Instances
| IsString RegString Source # | |
Defined in Control.Lens.Grammar Methods fromString :: String -> RegString # | |
| Monoid RegString Source # | |
| Semigroup RegString Source # | |
| IsList RegString Source # | |
| Read RegString Source # | |
| Show RegString Source # | |
| KleeneStarAlgebra RegString Source # | |
| NonTerminalSymbol RegString Source # | |
Defined in Control.Lens.Grammar Methods nonTerminal :: String -> RegString Source # | |
| Eq RegString Source # | |
| Ord RegString Source # | |
| TokenAlgebra Char RegString Source # | |
Defined in Control.Lens.Grammar Methods tokenClass :: TokenClass Char -> RegString Source # | |
| Matching String RegString Source # | |
| TerminalSymbol Char RegString Source # | |
| Tokenized Char RegString Source # | |
Defined in Control.Lens.Grammar | |
| type Item RegString Source # | |
Defined in Control.Lens.Grammar | |
regstringG :: RegGrammar Char a -> RegString Source #
regstringG generates a RegString from a regular grammar.
Since context-free Grammars and CtxGrammars aren't necessarily regular,
the type system will prevent regstringG from being applied to them.
regexGrammar :: Grammar Char RegString Source #
regexGrammar is a context-free Grammar for RegStrings.
It can't be a RegGrammar, since RegStrings include parenthesization.
But balanced parentheses
are a context-free language.
>>>putStringLn (regbnfG regexGrammar){start} = \q{regex} {alternate} = \q{sequence}(\|\q{sequence})* {atom} = \\q\q{nonterminal}|\q{class}|\(\q{regex}\) {category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn {char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped} {char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC {char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control} {class} = \q{class-one-of}|\q{class-not-one-of} {class-category} = \\p\{\q{category}\}|\\P\{(\q{category}(\|\q{category})*)\} {class-not-one-of} = \q{class-category}|\[\^\q{char}*(\q{class-category}?\]) {class-one-of} = \q{char}|\[\q{char}*\] {expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom} {nonterminal} = \{\q{char}*\} {regex} = \q{alternate} {sequence} = \q{expression}*
Context-free grammar
type Grammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p, forall x. BackusNaurForm (p x x)) => p a a Source #
Context-free Grammars add two capabilities to RegGrammars,
coming from the BackusNaurForm interface
ruleabstraction,- and general recursion.
regexGrammar and regbnfGrammar are examples of context-free
Grammars. Regular expressions are a form of expression algebra.
Let's see a similar but simpler example,
the algebra of arithmetic expressions of natural numbers.
>>>import Numeric.Natural (Natural)>>>:{data Arith = Num Natural | Add Arith Arith | Mul Arith Arith deriving stock (Eq, Ord, Show, Read) :}
Here are Prisms for the constructor patterns.
>>>import Control.Lens (Prism', prism')>>>:{_Num :: Prism' Arith Natural _Num = prism' Num (\case Num n -> Just n; _ -> Nothing) _Add, _Mul :: Prism' Arith (Arith, Arith) _Add = prism' (uncurry Add) (\case Add x y -> Just (x,y); _ -> Nothing) _Mul = prism' (uncurry Mul) (\case Mul x y -> Just (x,y); _ -> Nothing) :}
Now we can build a Grammar for Arith
by combining "idiom" style with named rules,
and tying the recursive loop
(caused by parenthesization)
with ruleRec.
>>>:{arithGrammar :: Grammar Char Arith arithGrammar = ruleRec "arith" sumG where sumG arith = rule "sum" $ chain1 Left _Add (sepWith "+") (prodG arith) prodG arith = rule "product" $ chain1 Left _Mul (sepWith "*") (factorG arith) factorG arith = rule "factor" $ numberG <|> terminal "(" >* arith *< terminal ")" numberG = rule "number" $ _Num . iso show read >? someP (asIn @Char DecimalNumber) :}
We can generate grammar strings, printers and parsers from arithGrammar.
>>>putStringLn (regbnfG arithGrammar){start} = \q{arith} {arith} = \q{sum} {factor} = \q{number}|\(\q{arith}\) {number} = \p{Nd}+ {product} = \q{factor}(\*\q{factor})* {sum} = \q{product}(\+\q{product})*>>>[x | (x,"") <- parseG arithGrammar "1+2*3+4"][Add (Add (Num 1) (Mul (Num 2) (Num 3))) (Num 4)]>>>unparseG arithGrammar (Add (Num 1) (Mul (Num 2) (Num 3))) "" :: Maybe StringJust "1+2*3">>>do pr <- printG arithGrammar (Num 69); pure (pr "") :: Maybe StringJust "69"
If all rules are non-recursive, then a Grammar
can be rewritten as a RegGrammar.
Since Haskell permits general recursion, and RegGrammars are
embedded in Haskell, you can define context-free grammars with them.
But it's recommended to use Grammars for rule abstraction
and generator support for ruleRec.
RegBnfs are an embedded domain specific language
of Backus-Naur forms extended by regular expression strings.
A RegBnf consists of a distinguished RegString "start" rule,
and a set of named RegString rules.
>>>putStringLn (rule "baz" (terminal "foo" >|< terminal "bar") :: RegBnf){start} = \q{baz} {baz} = foo|bar
Like RegStrings they have a string-like interface.
>>>let bnf = fromString "{start} = foo|bar" :: RegBnf>>>putStringLn bnf{start} = foo|bar>>>bnf"{start} = foo|bar">>>:type toList bnftoList bnf :: [Char]
RegBnfs can be generated from context-free Grammars with regbnfG.
>>>:type regbnfG regbnfGrammarregbnfG regbnfGrammar :: RegBnf
Like RegStrings, RegBnfs can be constructed using
Lexical, Monoid and KleeneStarAlgebra combinators.
But they also support BackusNaurForm rules and ruleRecs.
>>>putStringLn (rule "baz" (bnf >|< terminal "baz")){start} = \q{baz} {baz} = foo|bar|baz>>>putStringLn (ruleRec "∞-loop" (\x -> x) :: RegBnf){start} = \q{∞-loop} {∞-loop} = \q{∞-loop}
Instances
| IsString RegBnf Source # | |
Defined in Control.Lens.Grammar Methods fromString :: String -> RegBnf # | |
| Monoid RegBnf Source # | |
| Semigroup RegBnf Source # | |
| IsList RegBnf Source # | |
| Read RegBnf Source # | |
| Show RegBnf Source # | |
| BackusNaurForm RegBnf Source # | |
| KleeneStarAlgebra RegBnf Source # | |
| NonTerminalSymbol RegBnf Source # | |
Defined in Control.Lens.Grammar Methods nonTerminal :: String -> RegBnf Source # | |
| Eq RegBnf Source # | |
| Ord RegBnf Source # | |
| TokenAlgebra Char RegBnf Source # | |
Defined in Control.Lens.Grammar Methods tokenClass :: TokenClass Char -> RegBnf Source # | |
| Matching String RegBnf Source # | |
| TerminalSymbol Char RegBnf Source # | |
| Tokenized Char RegBnf Source # | |
| type Item RegBnf Source # | |
Defined in Control.Lens.Grammar | |
regbnfG :: Grammar Char a -> RegBnf Source #
regbnfG generates a RegBnf from a context-free Grammar.
Since CtxGrammars aren't context-free,
the type system will prevent regbnfG from being applied to a CtxGrammar.
It can apply to a RegGrammar.
regbnfGrammar :: Grammar Char RegBnf Source #
regbnfGrammar is a context-free Grammar for RegBnfs.
That means that it can generate a self-hosted definition.
>>>putStringLn (regbnfG regbnfGrammar){start} = \q{regbnf} {alternate} = \q{sequence}(\|\q{sequence})* {atom} = \\q\q{nonterminal}|\q{class}|\(\q{regex}\) {category} = Ll|Lu|Lt|Lm|Lo|Mn|Mc|Me|Nd|Nl|No|Pc|Pd|Ps|Pe|Pi|Pf|Po|Sm|Sc|Sk|So|Zs|Zl|Zp|Cc|Cf|Cs|Co|Cn {char} = [^\(\)\*\+\?\[\\\]\^\{\|\}\P{Cc}]|\\\q{char-escaped} {char-control} = NUL|SOH|STX|ETX|EOT|ENQ|ACK|BEL|BS|HT|LF|VT|FF|CR|SO|SI|DLE|DC1|DC2|DC3|DC4|NAK|SYN|ETB|CAN|EM|SUB|ESC|FS|GS|RS|US|DEL|PAD|HOP|BPH|NBH|IND|NEL|SSA|ESA|HTS|HTJ|VTS|PLD|PLU|RI|SS2|SS3|DCS|PU1|PU2|STS|CCH|MW|SPA|EPA|SOS|SGCI|SCI|CSI|ST|OSC|PM|APC {char-escaped} = [\(\)\*\+\?\[\\\]\^\{\|\}]|\q{char-control} {class} = \q{class-one-of}|\q{class-not-one-of} {class-category} = \\p\{\q{category}\}|\\P\{(\q{category}(\|\q{category})*)\} {class-not-one-of} = \q{class-category}|\[\^\q{char}*(\q{class-category}?\]) {class-one-of} = \q{char}|\[\q{char}*\] {expression} = \q{atom}\?|\q{atom}\*|\q{atom}\+|\q{atom} {nonterminal} = \{\q{char}*\} {regbnf} = \{start\} = \q{regex}(\LF\q{nonterminal}( = )\q{regex})* {regex} = \q{alternate} {sequence} = \q{expression}*
Arguments
| :: (Alternative f, TokenAlgebra token (f token), TerminalSymbol token (f ()), forall x. BackusNaurForm (f x)) | |
| => Grammar token a | context-free grammar |
| -> f a |
Generate any Applicative parser backend
from a Grammar with applicativeG.
It works the same way as monadG,
for parsers without Monad instances.
That permits backends to use algorithms
that can only parse context-free Grammars.
transducerG :: Categorized token => Grammar token a -> Transducer token Source #
Compile a Grammar into a Transducer.
>>>let regexMachine = transducerG @Char regexGrammar
A transducer is a form of finite state machine,
usable as an intermediary for further generators like
=~, expectNext, languageSample, parseForest & unreachableRules.
>>>import Test.QuickCheck>>>let regexLang = languageSample @Char regexMachine>>>words100 <- generate (take 100 <$> regexLang)>>>quickCheck (property (all (=~ regexMachine) words100))+++ OK, passed 1 test.>>>import Control.Monad.State>>>import System.Random>>>let gen = mkStdGen 69>>>evalState (take 15 <$> regexLang) gen["","|","\776269","()","[]","\\[","||","|\249908","\770923*","\1008821+","\318904?","\845807|","\477898\1026934","()*","()+"]
>>>import Data.Tree (drawForest)
>>> let (forest, _) = parseForest regexMachine "xy|z" in putStr (drawForest (map (fmap show) forest))
("regex",0,4,"xy|z")
|
`- ("alternate",0,4,"xy|z")
|
+- ("sequence",0,2,"xy")
| |
| +- ("expression",0,1,"x")
| | |
| | `- ("atom",0,1,"x")
| | |
| | `- ("class",0,1,"x")
| | |
| | `- ("class-one-of",0,1,"x")
| | |
| | `- ("char",0,1,"x")
| |
| `- ("expression",1,2,"y")
| |
| `- ("atom",1,2,"y")
| |
| `- ("class",1,2,"y")
| |
| `- ("class-one-of",1,2,"y")
| |
| `- ("char",1,2,"y")
|
`- ("sequence",3,4,"z")
|
`- ("expression",3,4,"z")
|
`- ("atom",3,4,"z")
|
`- ("class",3,4,"z")
|
`- ("class-one-of",3,4,"z")
|
`- ("char",3,4,"z")
Context-sensitive grammar
type CtxGrammar token a = forall (p :: Type -> Type -> Type). (Lexical token p, Alternator p, Filtrator p, MonadicTry p) => p a a Source #
For context-sensitivity,
the Monadic interface is used by importing Data.Profunctor.Monadic
qualified and using a "bonding" notation which mixes
"idiom" style with qualified do-notation.
Let's use length-encoded vectors of numbers as an example.
>>>import Numeric.Natural (Natural)>>>import Control.Lens.Iso (Iso', iso)>>>:set -XRecordWildCards>>>:{data LenVec = LenVec {length :: Natural, vector :: [Natural]} deriving (Eq, Ord, Show, Read) _LenVec :: Iso' LenVec (Natural, [Natural]) _LenVec = iso (\LenVec {..} -> (length, vector)) (\(length, vector) -> LenVec {..}) :}
>>>:set -XQualifiedDo>>>import qualified Data.Profunctor.Monadic as P>>>:{lenvecGrammar :: CtxGrammar Char LenVec lenvecGrammar = _LenVec >? P.do let numberG = iso show read >~ someP (asIn @Char DecimalNumber) vectorG n = intercalateP n (sepWith ",") numberG len <- numberG -- bonds to _LenVec terminal ";" -- doesn't bond vectorG (fromIntegral len) -- bonds to _LenVec :}
The qualified do-notation changes the signature of
P.>>=,
so that we must apply the constructor pattern _LenVec
to the do-block with the >? applicator.
Any scoped bound action, var <- action,
gets "bonded" to the constructor pattern.
Any unbound actions, except for the last action in the do-block,
does not get bonded to the pattern.
The last action does get bonded to the pattern.
Any unscoped bound action, _ <- action,
also gets bonded to the pattern,
but being unscoped means it isn't added to the context.
If all bound actions are unscoped,
and filtration & failure handling aren't used,
then a CtxGrammar can be rewritten as a Grammar since it is context-free.
We can't generate a RegBnf from a CtxGrammar since the rules
aren't static, but dynamic and contextual.
We can generate parsers and printers as expected.
>>>[vec | (vec, "") <- parseG lenvecGrammar "3;1,2,3"] :: [LenVec][LenVec {length = 3, vector = [1,2,3]}]>>>[vec | (vec, "") <- parseG lenvecGrammar "0;1,2,3"] :: [LenVec][]>>>[pr "" | pr <- printG lenvecGrammar (LenVec 2 [6,7])] :: [String]["2;6,7"]>>>[pr "" | pr <- printG lenvecGrammar (LenVec 200 [100])] :: [String][]
In addition to context-sensitivity via Monadic combinators,
CtxGrammars add unrestricted filtration to Grammars.
The satisfy combinator is an unrestricted token filter.
And the satisfied pattern is used together with the Choice &
Cochoice applicator >?< for unrestricted filtration.
>>>:{palindromeG :: CtxGrammar Char String palindromeG = rule "palindrome" $ satisfied (\wrd -> reverse wrd == wrd) >?< manyP (anyToken @Char) :}
>>>[pal | word <- ["racecar", "word"], (pal, "") <- parseG palindromeG word]["racecar"]
Since CtxGrammars are embedded in Haskell,
permitting computable predicates,
and Filtrator has a default definition for Monadic Alternators,
the context-sensitivity of CtxGrammar implies
unrestricted filtration of grammars by computable predicates,
which can recognize the larger class of recursively enumerable languages.
Finally, CtxGrammars support failure reporting and backtracking.
This has no effect on printG, parseG or unparseG;
but it effects parsecG and unparsecG.
For context, an LL grammar can be (un)parsed by an LL parser.
An LL parser (un)parses from left to right,
and constucts leftmost derivations.
An LL(k) parser can look k tokens ahead.
Parsor is an LL(∞) parser.
Parsector is an LL(1) parser.
The backtracking try combinator
restores full lookahead to Parsector.
Since both Parsor & Parsector are LL parsers they
diverge if the CtxGrammar they're run on is left-recursive.
>>>parsecG (rule "foo" (fail "bar") <|> fail "baz") "abc"ParsecState {parsecLooked = False, parsecOffset = 0, parsecStream = "abc", parsecFailure = ParsecFailure {parsecExpect = TokenClass (OneOf (fromList "")), parsecLabels = [Node {rootLabel = "foo", subForest = [Node {rootLabel = "bar", subForest = []}]},Node {rootLabel = "baz", subForest = []}]}, parsecResult = Nothing}
>>>parsecG (manyP (token 'a') >*< asIn @Char DecimalNumber) "aaab"ParsecState {parsecLooked = True, parsecOffset = 3, parsecStream = "b", parsecFailure = ParsecFailure {parsecExpect = TokenClass (Alternate (TokenClass (OneOf (fromList "a"))) (TokenClass (NotOneOf (fromList "") (AndAsIn DecimalNumber)))), parsecLabels = []}, parsecResult = Nothing}
>>>unparsecG (tokens "abc") "abx" ""ParsecState {parsecLooked = True, parsecOffset = 2, parsecStream = "ab", parsecFailure = ParsecFailure {parsecExpect = TokenClass (OneOf (fromList "c")), parsecLabels = []}, parsecResult = Nothing}
Arguments
| :: (Cons string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) | |
| => CtxGrammar token a | |
| -> a | syntax |
| -> m (string -> string) |
printG generates a printer from a CtxGrammar.
Since both RegGrammars and context-free Grammars are CtxGrammars,
the type system will allow printG to be applied to them.
Running the printer on a syntax value returns a function
that conses tokens at the beginning of an input string,
from right to left.
Arguments
| :: (Cons string string token token, Snoc string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) | |
| => CtxGrammar token a | |
| -> string | input |
| -> m (a, string) |
parseG generates a parser from a LL(∞) CtxGrammar.
Since both RegGrammars and context-free Grammars are CtxGrammars,
the type system will allow parseG to be applied to them.
Running the parser on an input string value unconses
tokens from the beginning of an input string from left to right,
returning a syntax value and the remaining output string.
Arguments
| :: (Cons string string token token, Snoc string string token token, IsList string, Item string ~ token, Categorized token, Alternative m, Monad m, Filterable m) | |
| => CtxGrammar token a | |
| -> a | syntax |
| -> string | input |
| -> m string |
unparseG generates a printer from a LL(∞) CtxGrammar.
Since both RegGrammars and context-free Grammars are CtxGrammars,
the type system will allow unparseG to be applied to them.
Running the printer on a syntax value and an input string
snocs tokens at the end of the string, from left to right,
returning the output string.
Arguments
| :: (Cons string string token token, Snoc string string token token, Item string ~ token, Categorized token) | |
| => CtxGrammar token a | |
| -> string | input |
| -> ParsecState string a |
parsecG generates a parser from a LL(1) CtxGrammar,
with try for restoring full LL(∞) lookahead.
Since both RegGrammars and context-free Grammars are CtxGrammars,
the type system will allow parsecG to be applied to them.
Running the parser on an input string value unconses
tokens from the beginning of an input string from left to right,
returning parsecResult as Nothing on failure or Just
an output syntax value, with parse failure stored in parsecFailure,
and a remaining output parsecStream.
Arguments
| :: (Cons string string token token, Snoc string string token token, Item string ~ token, Categorized token) | |
| => CtxGrammar token a | |
| -> a | syntax |
| -> string | input |
| -> ParsecState string a |
unparsecG generates a printer from a LL(1) CtxGrammar,
with try for restoring full LL(∞) lookahead.
Since both RegGrammars and context-free Grammars are CtxGrammars,
the type system will allow unparsecG to be applied to them.
Running the printer on a syntax value and an input string
snocs tokens at the end of the string, from left to right,
returning parsecResult as Nothing on failure or Just
the input syntax value, with print success stored in parsecStream.
readG :: CtxGrammar Char a -> ReadP a Source #
Generate a ReadP backend from a CtxGrammar Char.
Arguments
| :: (MonadTry m, TokenAlgebra token (m token), TerminalSymbol token (m ())) | |
| => CtxGrammar token a | context-sensitive grammar |
| -> m a |
Generate any parser Monad backend
from a CtxGrammar with monadG.
Let's see how to do this without orphan instances,
using the Megaparsec library.
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as M
import Control.Lens.Grammar
newtype WrapMega a = WrapMega {unwrapMega :: M.Parsec String String a}
deriving newtype
( Functor, Applicative, Alternative
, Monad, MonadPlus, MonadFail
)
instance TerminalSymbol Char (WrapMega ()) where
terminal str = WrapMega (M.chunk str *> pure ())
instance TokenAlgebra Char (WrapMega Char) where
tokenClass exam = WrapMega $ M.label (show exam) (M.satisfy (tokenClass exam))
instance Tokenized Char (WrapMega Char) where
anyToken = WrapMega M.anySingle
token = WrapMega . M.single
oneOf = WrapMega . M.oneOf
notOneOf = WrapMega . M.noneOf
asIn cat = WrapMega $ M.label ("in category " ++ show cat) (M.satisfy (asIn cat))
notAsIn cat = WrapMega $ M.label ("not in category " ++ show cat) (M.satisfy (notAsIn cat))
instance BackusNaurForm (WrapMega a) where
rule lbl (WrapMega p) = WrapMega (M.label lbl p)
ruleRec lbl = rule lbl . fix
instance Filterable WrapMega where
catMaybes m = m >>= maybe (fail "unrestricted filtration") pure
instance MonadTry WrapMega where
try (WrapMega p) = WrapMega (M.try p)
megaparsecG
:: CtxGrammar Char a
-> M.Parsec String String a
megaparsecG gram = unwrapMega (monadG gram)
Utility
putStringLn :: (IsList string, Item string ~ Char) => string -> IO () Source #
putStringLn is a utility that generalizes putStrLn
to string-like interfaces such as RegString and RegBnf.
Re-exports
module Control.Lens.Grammar.Boole
module Control.Lens.Grammar.Kleene
module Control.Lens.Grammar.Machine
module Control.Lens.Grammar.Symbol
module Control.Lens.Grammar.Token
module Control.Lens.PartialIso
module Control.Monad.Fail.Try
module Data.Profunctor.Distributor
module Data.Profunctor.Filtrator
module Data.Profunctor.Grammar
module Data.Profunctor.Monoidal
module Data.Profunctor.Separator
module Data.Traversable.Homogeneous