{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- | A 'Markup' parser and printer of strict 'ByteString's focused on optimising performance. 'Markup' is a representation of data such as HTML, SVG or XML but the parsing is not always at standards.
module MarkupParse
  ( -- * Usage

    --
    -- $usage

    -- * Markup
    Markup (..),
    Standard (..),
    markup,
    markup_,
    RenderStyle (..),
    markdown,
    markdown_,
    normalize,
    normContent,
    wellFormed,
    isWellFormed,

    -- * Warnings
    MarkupWarning (..),
    Warn,
    warnError,
    warnEither,
    warnMaybe,

    -- * Element
    Element,
    element,
    element_,
    emptyElem,
    elementc,
    content,
    contentRaw,

    -- * Token components
    NameTag,
    selfClosers,
    doctypeHtml,
    doctypeXml,
    AttrName,
    AttrValue,
    Attr (..),
    addAttrs,
    attrsP,
    nameP,

    -- * Tokens
    OpenTagType (..),
    Token (..),
    tokenize,
    tokenize_,
    tokenP,
    detokenize,
    gather,
    gather_,
    degather,
    degather_,

    -- * XML specific Parsers
    xmlVersionInfoP,
    xmlEncodingDeclP,
    xmlStandaloneP,
    xmlVersionNumP,
    xmlEncNameP,
    xmlYesNoP,

    -- * bytestring support
    utf8ToStr,
    strToUtf8,
    escapeChar,
    escape,

    -- * Tree support
    Tree (..),
  )
where

import Control.Category ((>>>))
import Control.DeepSeq
import Control.Monad
import Data.Bifunctor
import Data.Bool
import Data.ByteString (ByteString)
import Data.ByteString.Char8 qualified as B
import Data.Char hiding (isDigit)
import Data.Foldable
import Data.Function
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.String.Interpolate
import Data.These
import Data.Tree
import Data.TreeDiff
import FlatParse.Basic hiding (Result, cut, take)
import GHC.Generics
import MarkupParse.FlatParse
import Prelude hiding (replicate)

-- $setup
-- >>> :set -XTemplateHaskell
-- >>> :set -XQuasiQuotes
-- >>> :set -XOverloadedStrings
-- >>> import MarkupParse
-- >>> import MarkupParse.Patch
-- >>> import MarkupParse.FlatParse
-- >>> import FlatParse.Basic
-- >>> import Data.String.Interpolate
-- >>> import Data.ByteString.Char8 qualified as B
-- >>> import Data.Tree

-- $usage
--
-- > import MarkupParse
-- > import Data.ByteString qualified as B
-- >
-- > bs <- B.readFile "other/line.svg"
-- > m = markup_ bs
--
-- @'markdown_' . 'markup_'@ is an isomorphic round trip from 'Markup' to 'ByteString' to 'Markup':
--
-- - This is subject to the 'Markup' being 'wellFormed'.
--
-- - The round-trip @'markup_' . 'markdown_'@ is not isomorphic as parsing forgets whitespace within tags, comments and declarations.
--
-- - The underscores represent versions of main functions that throw an exception on warnings encountered along the way.
--
-- At a lower level, a round trip pipeline might look something like:
--
-- > tokenize Html >=>
--
-- - 'tokenize' converts a 'ByteString' to a 'Token' list.
--
-- > gather Html >=>
--
-- - 'gather' takes the tokens and gathers them into 'Tree's of 'Token's which is what 'Markup' is.
--
-- > (normalize >>> pure) >=>
--
-- - 'normalize' concatenates content, and normalizes attributes,
--
-- > degather Html >=>
--
-- - 'degather' turns the markup tree back into a token list. Finally,
--
-- > fmap (detokenize Html) >>> pure
--
-- - 'detokenize' turns a token back into a bytestring.
--
-- Along the way, the kleisi fishies and compose forward usage accumulates any warnings via the 'These' monad instance, which is wrapped into a type synonym named 'Warn'.

-- | From a parsing pov, Html & Xml (& Svg) are close enough that they share a lot of parsing logic, so that parsing and printing just need some tweaking.
--
-- The xml parsing logic is based on the XML productions found in https://www.w3.org/TR/xml/
--
-- The html parsing was based on a reading of <https://hackage.haskell.org/package/html-parse html-parse>, but ignores the various '\x00' to '\xfffd' & eof directives that form part of the html standards.
data Standard = Html | Xml deriving (Standard -> Standard -> Bool
(Standard -> Standard -> Bool)
-> (Standard -> Standard -> Bool) -> Eq Standard
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Standard -> Standard -> Bool
== :: Standard -> Standard -> Bool
$c/= :: Standard -> Standard -> Bool
/= :: Standard -> Standard -> Bool
Eq, Int -> Standard -> ShowS
[Standard] -> ShowS
Standard -> String
(Int -> Standard -> ShowS)
-> (Standard -> String) -> ([Standard] -> ShowS) -> Show Standard
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Standard -> ShowS
showsPrec :: Int -> Standard -> ShowS
$cshow :: Standard -> String
show :: Standard -> String
$cshowList :: [Standard] -> ShowS
showList :: [Standard] -> ShowS
Show, Eq Standard
Eq Standard =>
(Standard -> Standard -> Ordering)
-> (Standard -> Standard -> Bool)
-> (Standard -> Standard -> Bool)
-> (Standard -> Standard -> Bool)
-> (Standard -> Standard -> Bool)
-> (Standard -> Standard -> Standard)
-> (Standard -> Standard -> Standard)
-> Ord Standard
Standard -> Standard -> Bool
Standard -> Standard -> Ordering
Standard -> Standard -> Standard
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Standard -> Standard -> Ordering
compare :: Standard -> Standard -> Ordering
$c< :: Standard -> Standard -> Bool
< :: Standard -> Standard -> Bool
$c<= :: Standard -> Standard -> Bool
<= :: Standard -> Standard -> Bool
$c> :: Standard -> Standard -> Bool
> :: Standard -> Standard -> Bool
$c>= :: Standard -> Standard -> Bool
>= :: Standard -> Standard -> Bool
$cmax :: Standard -> Standard -> Standard
max :: Standard -> Standard -> Standard
$cmin :: Standard -> Standard -> Standard
min :: Standard -> Standard -> Standard
Ord, (forall x. Standard -> Rep Standard x)
-> (forall x. Rep Standard x -> Standard) -> Generic Standard
forall x. Rep Standard x -> Standard
forall x. Standard -> Rep Standard x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Standard -> Rep Standard x
from :: forall x. Standard -> Rep Standard x
$cto :: forall x. Rep Standard x -> Standard
to :: forall x. Rep Standard x -> Standard
Generic, Standard -> ()
(Standard -> ()) -> NFData Standard
forall a. (a -> ()) -> NFData a
$crnf :: Standard -> ()
rnf :: Standard -> ()
NFData, [Standard] -> Expr
Standard -> Expr
(Standard -> Expr) -> ([Standard] -> Expr) -> ToExpr Standard
forall a. (a -> Expr) -> ([a] -> Expr) -> ToExpr a
$ctoExpr :: Standard -> Expr
toExpr :: Standard -> Expr
$clistToExpr :: [Standard] -> Expr
listToExpr :: [Standard] -> Expr
ToExpr)

-- | A list of 'Element's or 'Tree' 'Token's
--
-- >>> markup Html "<foo class=\"bar\">baz</foo>"
-- That (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]})
newtype Markup = Markup {Markup -> [Element]
elements :: [Element]}
  deriving stock (Int -> Markup -> ShowS
[Markup] -> ShowS
Markup -> String
(Int -> Markup -> ShowS)
-> (Markup -> String) -> ([Markup] -> ShowS) -> Show Markup
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Markup -> ShowS
showsPrec :: Int -> Markup -> ShowS
$cshow :: Markup -> String
show :: Markup -> String
$cshowList :: [Markup] -> ShowS
showList :: [Markup] -> ShowS
Show, Markup -> Markup -> Bool
(Markup -> Markup -> Bool)
-> (Markup -> Markup -> Bool) -> Eq Markup
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Markup -> Markup -> Bool
== :: Markup -> Markup -> Bool
$c/= :: Markup -> Markup -> Bool
/= :: Markup -> Markup -> Bool
Eq, Eq Markup
Eq Markup =>
(Markup -> Markup -> Ordering)
-> (Markup -> Markup -> Bool)
-> (Markup -> Markup -> Bool)
-> (Markup -> Markup -> Bool)
-> (Markup -> Markup -> Bool)
-> (Markup -> Markup -> Markup)
-> (Markup -> Markup -> Markup)
-> Ord Markup
Markup -> Markup -> Bool
Markup -> Markup -> Ordering
Markup -> Markup -> Markup
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Markup -> Markup -> Ordering
compare :: Markup -> Markup -> Ordering
$c< :: Markup -> Markup -> Bool
< :: Markup -> Markup -> Bool
$c<= :: Markup -> Markup -> Bool
<= :: Markup -> Markup -> Bool
$c> :: Markup -> Markup -> Bool
> :: Markup -> Markup -> Bool
$c>= :: Markup -> Markup -> Bool
>= :: Markup -> Markup -> Bool
$cmax :: Markup -> Markup -> Markup
max :: Markup -> Markup -> Markup
$cmin :: Markup -> Markup -> Markup
min :: Markup -> Markup -> Markup
Ord, (forall x. Markup -> Rep Markup x)
-> (forall x. Rep Markup x -> Markup) -> Generic Markup
forall x. Rep Markup x -> Markup
forall x. Markup -> Rep Markup x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Markup -> Rep Markup x
from :: forall x. Markup -> Rep Markup x
$cto :: forall x. Rep Markup x -> Markup
to :: forall x. Rep Markup x -> Markup
Generic)
  deriving anyclass (Markup -> ()
(Markup -> ()) -> NFData Markup
forall a. (a -> ()) -> NFData a
$crnf :: Markup -> ()
rnf :: Markup -> ()
NFData, [Markup] -> Expr
Markup -> Expr
(Markup -> Expr) -> ([Markup] -> Expr) -> ToExpr Markup
forall a. (a -> Expr) -> ([a] -> Expr) -> ToExpr a
$ctoExpr :: Markup -> Expr
toExpr :: Markup -> Expr
$clistToExpr :: [Markup] -> Expr
listToExpr :: [Markup] -> Expr
ToExpr)
  deriving newtype (NonEmpty Markup -> Markup
Markup -> Markup -> Markup
(Markup -> Markup -> Markup)
-> (NonEmpty Markup -> Markup)
-> (forall b. Integral b => b -> Markup -> Markup)
-> Semigroup Markup
forall b. Integral b => b -> Markup -> Markup
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
$c<> :: Markup -> Markup -> Markup
<> :: Markup -> Markup -> Markup
$csconcat :: NonEmpty Markup -> Markup
sconcat :: NonEmpty Markup -> Markup
$cstimes :: forall b. Integral b => b -> Markup -> Markup
stimes :: forall b. Integral b => b -> Markup -> Markup
Semigroup, Semigroup Markup
Markup
Semigroup Markup =>
Markup
-> (Markup -> Markup -> Markup)
-> ([Markup] -> Markup)
-> Monoid Markup
[Markup] -> Markup
Markup -> Markup -> Markup
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
$cmempty :: Markup
mempty :: Markup
$cmappend :: Markup -> Markup -> Markup
mappend :: Markup -> Markup -> Markup
$cmconcat :: [Markup] -> Markup
mconcat :: [Markup] -> Markup
Monoid)

-- | markup-parse generally tries to continue on parse errors, and return what has/can still be parsed, together with any warnings.
data MarkupWarning
  = -- | A tag ending with "/>" that is not an element of 'selfClosers' (Html only).
    BadEmptyElemTag
  | -- | A tag ending with "/>" that has children. Cannot happen in the parsing phase.
    SelfCloserWithChildren
  | -- | Only a 'StartTag' can have child tokens.
    LeafWithChildren
  | -- | A CloseTag with a different name to the currently open StartTag.
    TagMismatch NameTag NameTag
  | -- | An EndTag with no corresponding StartTag.
    UnmatchedEndTag
  | -- | An StartTag with no corresponding EndTag.
    UnclosedTag
  | -- | An EndTag should never appear in 'Markup'
    EndTagInTree
  | -- | Empty Content, Comment, Decl or Doctype
    EmptyContent
  | -- | Badly formed declaration
    BadDecl
  | MarkupParser ParserWarning
  deriving (MarkupWarning -> MarkupWarning -> Bool
(MarkupWarning -> MarkupWarning -> Bool)
-> (MarkupWarning -> MarkupWarning -> Bool) -> Eq MarkupWarning
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: MarkupWarning -> MarkupWarning -> Bool
== :: MarkupWarning -> MarkupWarning -> Bool
$c/= :: MarkupWarning -> MarkupWarning -> Bool
/= :: MarkupWarning -> MarkupWarning -> Bool
Eq, Int -> MarkupWarning -> ShowS
[MarkupWarning] -> ShowS
MarkupWarning -> String
(Int -> MarkupWarning -> ShowS)
-> (MarkupWarning -> String)
-> ([MarkupWarning] -> ShowS)
-> Show MarkupWarning
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MarkupWarning -> ShowS
showsPrec :: Int -> MarkupWarning -> ShowS
$cshow :: MarkupWarning -> String
show :: MarkupWarning -> String
$cshowList :: [MarkupWarning] -> ShowS
showList :: [MarkupWarning] -> ShowS
Show, Eq MarkupWarning
Eq MarkupWarning =>
(MarkupWarning -> MarkupWarning -> Ordering)
-> (MarkupWarning -> MarkupWarning -> Bool)
-> (MarkupWarning -> MarkupWarning -> Bool)
-> (MarkupWarning -> MarkupWarning -> Bool)
-> (MarkupWarning -> MarkupWarning -> Bool)
-> (MarkupWarning -> MarkupWarning -> MarkupWarning)
-> (MarkupWarning -> MarkupWarning -> MarkupWarning)
-> Ord MarkupWarning
MarkupWarning -> MarkupWarning -> Bool
MarkupWarning -> MarkupWarning -> Ordering
MarkupWarning -> MarkupWarning -> MarkupWarning
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: MarkupWarning -> MarkupWarning -> Ordering
compare :: MarkupWarning -> MarkupWarning -> Ordering
$c< :: MarkupWarning -> MarkupWarning -> Bool
< :: MarkupWarning -> MarkupWarning -> Bool
$c<= :: MarkupWarning -> MarkupWarning -> Bool
<= :: MarkupWarning -> MarkupWarning -> Bool
$c> :: MarkupWarning -> MarkupWarning -> Bool
> :: MarkupWarning -> MarkupWarning -> Bool
$c>= :: MarkupWarning -> MarkupWarning -> Bool
>= :: MarkupWarning -> MarkupWarning -> Bool
$cmax :: MarkupWarning -> MarkupWarning -> MarkupWarning
max :: MarkupWarning -> MarkupWarning -> MarkupWarning
$cmin :: MarkupWarning -> MarkupWarning -> MarkupWarning
min :: MarkupWarning -> MarkupWarning -> MarkupWarning
Ord, (forall x. MarkupWarning -> Rep MarkupWarning x)
-> (forall x. Rep MarkupWarning x -> MarkupWarning)
-> Generic MarkupWarning
forall x. Rep MarkupWarning x -> MarkupWarning
forall x. MarkupWarning -> Rep MarkupWarning x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. MarkupWarning -> Rep MarkupWarning x
from :: forall x. MarkupWarning -> Rep MarkupWarning x
$cto :: forall x. Rep MarkupWarning x -> MarkupWarning
to :: forall x. Rep MarkupWarning x -> MarkupWarning
Generic, MarkupWarning -> ()
(MarkupWarning -> ()) -> NFData MarkupWarning
forall a. (a -> ()) -> NFData a
$crnf :: MarkupWarning -> ()
rnf :: MarkupWarning -> ()
NFData)

showWarnings :: [MarkupWarning] -> String
showWarnings :: [MarkupWarning] -> String
showWarnings = [MarkupWarning] -> [MarkupWarning]
forall a. Eq a => [a] -> [a]
List.nub ([MarkupWarning] -> [MarkupWarning])
-> ([MarkupWarning] -> String) -> [MarkupWarning] -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> (MarkupWarning -> String) -> [MarkupWarning] -> [String]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap MarkupWarning -> String
forall a. Show a => a -> String
show ([MarkupWarning] -> [String])
-> ([String] -> String) -> [MarkupWarning] -> String
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> [String] -> String
unlines

-- | A type synonym for the common returning type of many functions. A common computation pipeline is to take advantage of the 'These' Monad instance eg
--
-- > markup s bs = bs & (tokenize s >=> gather s) & second (Markup s)
type Warn a = These [MarkupWarning] a

-- | Convert any warnings to an 'error'
--
-- >>> warnError $ (tokenize Html) "<foo"
-- *** Exception: MarkupParser (ParserLeftover "<foo")
-- ...
warnError :: Warn a -> a
warnError :: forall a. Warn a -> a
warnError = ([MarkupWarning] -> a)
-> (a -> a)
-> ([MarkupWarning] -> a -> a)
-> These [MarkupWarning] a
-> a
forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these ([MarkupWarning] -> String
showWarnings ([MarkupWarning] -> String)
-> (String -> a) -> [MarkupWarning] -> a
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> a
forall a. HasCallStack => String -> a
error) a -> a
forall a. a -> a
id (\[MarkupWarning]
xs a
a -> a -> a -> Bool -> a
forall a. a -> a -> Bool -> a
bool (String -> a
forall a. HasCallStack => String -> a
error ([MarkupWarning] -> String
showWarnings [MarkupWarning]
xs)) a
a ([MarkupWarning]
xs [MarkupWarning] -> [MarkupWarning] -> Bool
forall a. Eq a => a -> a -> Bool
== []))

-- | Returns Left on any warnings
--
-- >>> warnEither $ (tokenize Html) "<foo><baz"
-- Left [MarkupParser (ParserLeftover "<baz")]
warnEither :: Warn a -> Either [MarkupWarning] a
warnEither :: forall a. Warn a -> Either [MarkupWarning] a
warnEither = ([MarkupWarning] -> Either [MarkupWarning] a)
-> (a -> Either [MarkupWarning] a)
-> ([MarkupWarning] -> a -> Either [MarkupWarning] a)
-> These [MarkupWarning] a
-> Either [MarkupWarning] a
forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these [MarkupWarning] -> Either [MarkupWarning] a
forall a b. a -> Either a b
Left a -> Either [MarkupWarning] a
forall a b. b -> Either a b
Right (\[MarkupWarning]
xs a
a -> Either [MarkupWarning] a
-> Either [MarkupWarning] a -> Bool -> Either [MarkupWarning] a
forall a. a -> a -> Bool -> a
bool ([MarkupWarning] -> Either [MarkupWarning] a
forall a b. a -> Either a b
Left [MarkupWarning]
xs) (a -> Either [MarkupWarning] a
forall a b. b -> Either a b
Right a
a) ([MarkupWarning]
xs [MarkupWarning] -> [MarkupWarning] -> Bool
forall a. Eq a => a -> a -> Bool
== []))

-- | Returns results, if any, ignoring warnings.
--
-- >>> warnMaybe $ (tokenize Html) "<foo><baz"
-- Just [OpenTag StartTag "foo" []]
warnMaybe :: Warn a -> Maybe a
warnMaybe :: forall a. Warn a -> Maybe a
warnMaybe = ([MarkupWarning] -> Maybe a)
-> (a -> Maybe a)
-> ([MarkupWarning] -> a -> Maybe a)
-> These [MarkupWarning] a
-> Maybe a
forall a c b.
(a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these (Maybe a -> [MarkupWarning] -> Maybe a
forall a b. a -> b -> a
const Maybe a
forall a. Maybe a
Nothing) a -> Maybe a
forall a. a -> Maybe a
Just (\[MarkupWarning]
_ a
a -> a -> Maybe a
forall a. a -> Maybe a
Just a
a)

-- | Convert bytestrings to 'Markup'
--
-- >>> markup Html "<foo><br></foo><baz"
-- These [MarkupParser (ParserLeftover "<baz")] (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [], subForest = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}]})
markup :: Standard -> ByteString -> Warn Markup
markup :: Standard -> ByteString -> Warn Markup
markup Standard
s ByteString
bs = ByteString
bs ByteString -> (ByteString -> Warn Markup) -> Warn Markup
forall a b. a -> (a -> b) -> b
& (Standard -> ByteString -> Warn [Token]
tokenize Standard
s (ByteString -> Warn [Token])
-> ([Token] -> Warn Markup) -> ByteString -> Warn Markup
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Standard -> [Token] -> Warn Markup
gather Standard
s)

-- | 'markup' but errors on warnings.
markup_ :: Standard -> ByteString -> Markup
markup_ :: Standard -> ByteString -> Markup
markup_ Standard
s ByteString
bs = Standard -> ByteString -> Warn Markup
markup Standard
s ByteString
bs Warn Markup -> (Warn Markup -> Markup) -> Markup
forall a b. a -> (a -> b) -> b
& Warn Markup -> Markup
forall a. Warn a -> a
warnError

-- | Concatenate sequential content and normalize attributes; unwording class values and removing duplicate attributes (taking last).
--
-- >>> B.putStr $ warnError $ markdown Compact Xml $ normalize (markup_ Xml [i|<foo class="a" class="b" bar="first" bar="last"/>|])
-- <foo bar="last" class="a b"/>
normalize :: Markup -> Markup
normalize :: Markup -> Markup
normalize Markup
m = Markup -> Markup
normContent (Markup -> Markup) -> Markup -> Markup
forall a b. (a -> b) -> a -> b
$ [Element] -> Markup
Markup ([Element] -> Markup) -> [Element] -> Markup
forall a b. (a -> b) -> a -> b
$ (Element -> Element) -> [Element] -> [Element]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Token -> Token) -> Element -> Element
forall a b. (a -> b) -> Tree a -> Tree b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
normTokenAttrs) (Markup -> [Element]
elements Markup
m)

-- | Are the trees in the markup well-formed?
isWellFormed :: Standard -> Markup -> Bool
isWellFormed :: Standard -> Markup -> Bool
isWellFormed Standard
s = ([MarkupWarning] -> [MarkupWarning] -> Bool
forall a. Eq a => a -> a -> Bool
== []) ([MarkupWarning] -> Bool)
-> (Markup -> [MarkupWarning]) -> Markup -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Standard -> Markup -> [MarkupWarning]
wellFormed Standard
s

-- | Check for well-formedness and return warnings encountered.
--
-- >>> wellFormed Html $ Markup [Node (Comment "") [], Node (EndTag "foo") [], Node (OpenTag EmptyElemTag "foo" []) [Node (Content "bar") []], Node (OpenTag EmptyElemTag "foo" []) []]
-- [EmptyContent,EndTagInTree,LeafWithChildren,BadEmptyElemTag]
wellFormed :: Standard -> Markup -> [MarkupWarning]
wellFormed :: Standard -> Markup -> [MarkupWarning]
wellFormed Standard
s (Markup [Element]
trees) = [MarkupWarning] -> [MarkupWarning]
forall a. Eq a => [a] -> [a]
List.nub ([MarkupWarning] -> [MarkupWarning])
-> [MarkupWarning] -> [MarkupWarning]
forall a b. (a -> b) -> a -> b
$ [[MarkupWarning]] -> [MarkupWarning]
forall a. Monoid a => [a] -> a
mconcat ((Token -> [[MarkupWarning]] -> [MarkupWarning])
-> Element -> [MarkupWarning]
forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree Token -> [[MarkupWarning]] -> [MarkupWarning]
checkNode (Element -> [MarkupWarning]) -> [Element] -> [[MarkupWarning]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Element]
trees)
  where
    checkNode :: Token -> [[MarkupWarning]] -> [MarkupWarning]
checkNode (OpenTag OpenTagType
StartTag ByteString
_ [Attr]
_) [[MarkupWarning]]
xs = [[MarkupWarning]] -> [MarkupWarning]
forall a. Monoid a => [a] -> a
mconcat [[MarkupWarning]]
xs
    checkNode (OpenTag OpenTagType
EmptyElemTag ByteString
n [Attr]
_) [] =
      [MarkupWarning] -> [MarkupWarning] -> Bool -> [MarkupWarning]
forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
BadEmptyElemTag] (Bool -> Bool
not (ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers) Bool -> Bool -> Bool
&& Standard
s Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html)
    checkNode (EndTag ByteString
_) [] = [MarkupWarning
EndTagInTree]
    checkNode (Content ByteString
bs) [] = [MarkupWarning] -> [MarkupWarning] -> Bool -> [MarkupWarning]
forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (ByteString
bs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"")
    checkNode (Comment ByteString
bs) [] = [MarkupWarning] -> [MarkupWarning] -> Bool -> [MarkupWarning]
forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (ByteString
bs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"")
    checkNode (Decl ByteString
bs [Attr]
as) []
      | ByteString
bs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"" = [MarkupWarning
EmptyContent]
      | Standard
s Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html Bool -> Bool -> Bool
&& [Attr]
as [Attr] -> [Attr] -> Bool
forall a. Eq a => a -> a -> Bool
/= [] = [MarkupWarning
BadDecl]
      | Standard
s Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Xml Bool -> Bool -> Bool
&& (ByteString
"version" ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attr -> ByteString
attrName (Attr -> ByteString) -> [Attr] -> [ByteString]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Attr]
as)) Bool -> Bool -> Bool
&& (ByteString
"encoding" ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (Attr -> ByteString
attrName (Attr -> ByteString) -> [Attr] -> [ByteString]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Attr]
as)) =
          [MarkupWarning
BadDecl]
      | Bool
otherwise = []
    checkNode (Doctype ByteString
bs) [] = [MarkupWarning] -> [MarkupWarning] -> Bool -> [MarkupWarning]
forall a. a -> a -> Bool -> a
bool [] [MarkupWarning
EmptyContent] (ByteString
bs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
"")
    checkNode Token
_ [[MarkupWarning]]
_ = [MarkupWarning
LeafWithChildren]

-- | Name of token
type NameTag = ByteString

-- | Whether an opening tag is a start tag or an empty element tag.
data OpenTagType = StartTag | EmptyElemTag deriving (Int -> OpenTagType -> ShowS
[OpenTagType] -> ShowS
OpenTagType -> String
(Int -> OpenTagType -> ShowS)
-> (OpenTagType -> String)
-> ([OpenTagType] -> ShowS)
-> Show OpenTagType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> OpenTagType -> ShowS
showsPrec :: Int -> OpenTagType -> ShowS
$cshow :: OpenTagType -> String
show :: OpenTagType -> String
$cshowList :: [OpenTagType] -> ShowS
showList :: [OpenTagType] -> ShowS
Show, Eq OpenTagType
Eq OpenTagType =>
(OpenTagType -> OpenTagType -> Ordering)
-> (OpenTagType -> OpenTagType -> Bool)
-> (OpenTagType -> OpenTagType -> Bool)
-> (OpenTagType -> OpenTagType -> Bool)
-> (OpenTagType -> OpenTagType -> Bool)
-> (OpenTagType -> OpenTagType -> OpenTagType)
-> (OpenTagType -> OpenTagType -> OpenTagType)
-> Ord OpenTagType
OpenTagType -> OpenTagType -> Bool
OpenTagType -> OpenTagType -> Ordering
OpenTagType -> OpenTagType -> OpenTagType
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: OpenTagType -> OpenTagType -> Ordering
compare :: OpenTagType -> OpenTagType -> Ordering
$c< :: OpenTagType -> OpenTagType -> Bool
< :: OpenTagType -> OpenTagType -> Bool
$c<= :: OpenTagType -> OpenTagType -> Bool
<= :: OpenTagType -> OpenTagType -> Bool
$c> :: OpenTagType -> OpenTagType -> Bool
> :: OpenTagType -> OpenTagType -> Bool
$c>= :: OpenTagType -> OpenTagType -> Bool
>= :: OpenTagType -> OpenTagType -> Bool
$cmax :: OpenTagType -> OpenTagType -> OpenTagType
max :: OpenTagType -> OpenTagType -> OpenTagType
$cmin :: OpenTagType -> OpenTagType -> OpenTagType
min :: OpenTagType -> OpenTagType -> OpenTagType
Ord, OpenTagType -> OpenTagType -> Bool
(OpenTagType -> OpenTagType -> Bool)
-> (OpenTagType -> OpenTagType -> Bool) -> Eq OpenTagType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: OpenTagType -> OpenTagType -> Bool
== :: OpenTagType -> OpenTagType -> Bool
$c/= :: OpenTagType -> OpenTagType -> Bool
/= :: OpenTagType -> OpenTagType -> Bool
Eq, (forall x. OpenTagType -> Rep OpenTagType x)
-> (forall x. Rep OpenTagType x -> OpenTagType)
-> Generic OpenTagType
forall x. Rep OpenTagType x -> OpenTagType
forall x. OpenTagType -> Rep OpenTagType x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. OpenTagType -> Rep OpenTagType x
from :: forall x. OpenTagType -> Rep OpenTagType x
$cto :: forall x. Rep OpenTagType x -> OpenTagType
to :: forall x. Rep OpenTagType x -> OpenTagType
Generic, OpenTagType -> ()
(OpenTagType -> ()) -> NFData OpenTagType
forall a. (a -> ()) -> NFData a
$crnf :: OpenTagType -> ()
rnf :: OpenTagType -> ()
NFData, [OpenTagType] -> Expr
OpenTagType -> Expr
(OpenTagType -> Expr)
-> ([OpenTagType] -> Expr) -> ToExpr OpenTagType
forall a. (a -> Expr) -> ([a] -> Expr) -> ToExpr a
$ctoExpr :: OpenTagType -> Expr
toExpr :: OpenTagType -> Expr
$clistToExpr :: [OpenTagType] -> Expr
listToExpr :: [OpenTagType] -> Expr
ToExpr)

-- | A Markup token. The term is borrowed from <https://www.w3.org/html/wg/spec/tokenization.html#tokenization HTML> standards but is used across 'Html' and 'Xml' in this library.
--
-- Note that the 'Token' type is used in two slightly different contexts:
--
-- - As an intermediary representation of markup between 'ByteString' and 'Markup'.
--
-- - As the primitives of 'Markup' 'Element's
--
-- Specifically, an 'EndTag' will occur in a list of tokens, but not as a primitive in 'Markup'. It may turn out to be better to have two different types for these two uses and future iterations of this library may head in this direction.
--
-- >>> runParser_ (many (tokenP Html)) [i|<foo>content</foo>|]
-- [OpenTag StartTag "foo" [],Content "content",EndTag "foo"]
--
-- >>> runParser_ (tokenP Xml) [i|<foo/>|]
-- OpenTag EmptyElemTag "foo" []
--
-- >>> runParser_ (tokenP Html) "<!-- Comment -->"
-- Comment " Comment "
--
-- >>> runParser_ (tokenP Xml) [i|<?xml version="1.0" encoding="UTF-8"?>|]
-- Decl "xml" [Attr {attrName = "version", attrValue = " version=\"1.0\""},Attr {attrName = "encoding", attrValue = "UTF-8"}]
--
-- >>> runParser_ (tokenP Html) "<!DOCTYPE html>"
-- Doctype "DOCTYPE html"
--
-- >>> runParser_ (tokenP Xml) "<!DOCTYPE foo [ declarations ]>"
-- Doctype "DOCTYPE foo [ declarations ]"
--
-- >>> runParser (tokenP Html) [i|<foo a="a" b="b" c=c check>|]
-- OK (OpenTag StartTag "foo" [Attr {attrName = "a", attrValue = "a"},Attr {attrName = "b", attrValue = "b"},Attr {attrName = "c", attrValue = "c"},Attr {attrName = "check", attrValue = ""}]) ""
--
-- >>> runParser (tokenP Xml) [i|<foo a="a" b="b" c=c check>|]
-- Fail
data Token
  = -- | A tag. https://developer.mozilla.org/en-US/docs/Glossary/Tag
    OpenTag !OpenTagType !NameTag ![Attr]
  | -- | A closing tag.
    EndTag !NameTag
  | -- | The content between tags.
    Content !ByteString
  | -- | Contents of a comment.
    Comment !ByteString
  | -- | Contents of a declaration
    Decl !ByteString ![Attr]
  | -- | Contents of a doctype declaration.
    Doctype !ByteString
  deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Token -> ShowS
showsPrec :: Int -> Token -> ShowS
$cshow :: Token -> String
show :: Token -> String
$cshowList :: [Token] -> ShowS
showList :: [Token] -> ShowS
Show, Eq Token
Eq Token =>
(Token -> Token -> Ordering)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Token)
-> (Token -> Token -> Token)
-> Ord Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Token -> Token -> Ordering
compare :: Token -> Token -> Ordering
$c< :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
>= :: Token -> Token -> Bool
$cmax :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
min :: Token -> Token -> Token
Ord, Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
/= :: Token -> Token -> Bool
Eq, (forall x. Token -> Rep Token x)
-> (forall x. Rep Token x -> Token) -> Generic Token
forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Token -> Rep Token x
from :: forall x. Token -> Rep Token x
$cto :: forall x. Rep Token x -> Token
to :: forall x. Rep Token x -> Token
Generic, Token -> ()
(Token -> ()) -> NFData Token
forall a. (a -> ()) -> NFData a
$crnf :: Token -> ()
rnf :: Token -> ()
NFData, [Token] -> Expr
Token -> Expr
(Token -> Expr) -> ([Token] -> Expr) -> ToExpr Token
forall a. (a -> Expr) -> ([a] -> Expr) -> ToExpr a
$ctoExpr :: Token -> Expr
toExpr :: Token -> Expr
$clistToExpr :: [Token] -> Expr
listToExpr :: [Token] -> Expr
ToExpr)

-- | Escape a single character.
escapeChar :: Char -> ByteString
escapeChar :: Char -> ByteString
escapeChar Char
'<' = ByteString
"&lt;"
escapeChar Char
'>' = ByteString
"&gt;"
escapeChar Char
'&' = ByteString
"&amp;"
escapeChar Char
'\'' = ByteString
"&apos;"
escapeChar Char
'"' = ByteString
"&quot;"
escapeChar Char
x = Char -> ByteString
B.singleton Char
x

-- | Escape the following predefined character entity references:
--
-- @
-- escapeChar \'<\' = "&lt;"
-- escapeChar \'>\' = "&gt;"
-- escapeChar \'&\' = "&amp;"
-- escapeChar '\'' = "&apos;"
-- escapeChar '"' = "&quot;"
-- @
--
-- No attempt is made to meet the <https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references HTML Standards>
--
-- >>> escape [i|<foo class="a" bar='b'>|]
-- "&lt;foo class=&quot;a&quot; bar=&apos;b&apos;&gt;"
escape :: ByteString -> ByteString
escape :: ByteString -> ByteString
escape ByteString
bs = (Char -> ByteString) -> ByteString -> ByteString
B.concatMap Char -> ByteString
escapeChar ByteString
bs

-- | Append attributes to an existing Token attribute list. Returns Nothing for tokens that do not have attributes.
addAttrs :: [Attr] -> Token -> Maybe Token
addAttrs :: [Attr] -> Token -> Maybe Token
addAttrs [Attr]
as (OpenTag OpenTagType
t ByteString
n [Attr]
as') = Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ OpenTagType -> ByteString -> [Attr] -> Token
OpenTag OpenTagType
t ByteString
n ([Attr]
as [Attr] -> [Attr] -> [Attr]
forall a. Semigroup a => a -> a -> a
<> [Attr]
as')
addAttrs [Attr]
_ Token
_ = Maybe Token
forall a. Maybe a
Nothing

-- | Standard Html Doctype
--
-- >>> markdown_ Compact Html doctypeHtml
-- "<!DOCTYPE html>"
doctypeHtml :: Markup
doctypeHtml :: Markup
doctypeHtml = [Element] -> Markup
Markup ([Element] -> Markup) -> [Element] -> Markup
forall a b. (a -> b) -> a -> b
$ Element -> [Element]
forall a. a -> [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Element -> [Element]) -> Element -> [Element]
forall a b. (a -> b) -> a -> b
$ Token -> Element
forall a. a -> Tree a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteString -> Token
Doctype ByteString
"DOCTYPE html")

-- | Standard Xml Doctype
--
-- >>> markdown_ Compact Xml doctypeXml
-- "<?xml version=\"1.0\" encoding=\"utf-8\"?><!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n    \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">"
doctypeXml :: Markup
doctypeXml :: Markup
doctypeXml =
  [Element] -> Markup
Markup
    [ Token -> Element
forall a. a -> Tree a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token -> Element) -> Token -> Element
forall a b. (a -> b) -> a -> b
$ ByteString -> [Attr] -> Token
Decl ByteString
"xml" [ByteString -> ByteString -> Attr
Attr ByteString
"version" ByteString
"1.0", ByteString -> ByteString -> Attr
Attr ByteString
"encoding" ByteString
"utf-8"],
      Token -> Element
forall a. a -> Tree a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token -> Element) -> Token -> Element
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
Doctype ByteString
"DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n    \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\""
    ]

-- | A flatparse 'Token' parser.
--
-- >>> runParser (tokenP Html) "<foo>content</foo>"
-- OK (OpenTag StartTag "foo" []) "content</foo>"
tokenP :: Standard -> Parser e Token
tokenP :: forall e. Standard -> Parser e Token
tokenP Standard
Html = Parser e Token
forall e. Parser e Token
tokenHtmlP
tokenP Standard
Xml = Parser e Token
forall e. Parser e Token
tokenXmlP

-- | Parse a bytestring into tokens
--
-- >>> tokenize Html [i|<foo>content</foo>|]
-- That [OpenTag StartTag "foo" [],Content "content",EndTag "foo"]
tokenize :: Standard -> ByteString -> Warn [Token]
tokenize :: Standard -> ByteString -> Warn [Token]
tokenize Standard
s ByteString
bs = (ParserWarning -> [MarkupWarning])
-> These ParserWarning [Token] -> Warn [Token]
forall a b c. (a -> b) -> These a c -> These b c
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ((MarkupWarning -> [MarkupWarning] -> [MarkupWarning]
forall a. a -> [a] -> [a]
: []) (MarkupWarning -> [MarkupWarning])
-> (ParserWarning -> MarkupWarning)
-> ParserWarning
-> [MarkupWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParserWarning -> MarkupWarning
MarkupParser) (These ParserWarning [Token] -> Warn [Token])
-> These ParserWarning [Token] -> Warn [Token]
forall a b. (a -> b) -> a -> b
$ Parser ByteString [Token]
-> ByteString -> These ParserWarning [Token]
forall a.
Parser ByteString a -> ByteString -> These ParserWarning a
runParserWarn (ParserT PureMode ByteString Token -> Parser ByteString [Token]
forall a.
ParserT PureMode ByteString a -> ParserT PureMode ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Standard -> ParserT PureMode ByteString Token
forall e. Standard -> Parser e Token
tokenP Standard
s)) ByteString
bs

-- | tokenize but errors on warnings.
tokenize_ :: Standard -> ByteString -> [Token]
tokenize_ :: Standard -> ByteString -> [Token]
tokenize_ Standard
s ByteString
bs = Standard -> ByteString -> Warn [Token]
tokenize Standard
s ByteString
bs Warn [Token] -> (Warn [Token] -> [Token]) -> [Token]
forall a b. a -> (a -> b) -> b
& Warn [Token] -> [Token]
forall a. Warn a -> a
warnError

-- | Html tags that self-close
selfClosers :: [NameTag]
selfClosers :: [ByteString]
selfClosers =
  [ ByteString
"area",
    ByteString
"base",
    ByteString
"br",
    ByteString
"col",
    ByteString
"embed",
    ByteString
"hr",
    ByteString
"img",
    ByteString
"input",
    ByteString
"link",
    ByteString
"meta",
    ByteString
"param",
    ByteString
"source",
    ByteString
"track",
    ByteString
"wbr"
  ]

-- | Most functions return a 'Markup' rather than an 'Element' because it is often more ergonomic to use the free monoid (aka a list) in preference to returning a 'Maybe' 'Element' (say).
type Element = Tree Token

-- | Create 'Markup' from a name tag and attributes that wraps some other markup.
--
-- >>> element "div" [] (element_ "br" [])
-- Markup {elements = [Node {rootLabel = OpenTag StartTag "div" [], subForest = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}]}
element :: NameTag -> [Attr] -> Markup -> Markup
element :: ByteString -> [Attr] -> Markup -> Markup
element ByteString
n [Attr]
as (Markup [Element]
xs) = [Element] -> Markup
Markup [Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node (OpenTagType -> ByteString -> [Attr] -> Token
OpenTag OpenTagType
StartTag ByteString
n [Attr]
as) [Element]
xs]

-- | Create 'Markup' from a name tag and attributes that doesn't wrap some other markup. The 'OpenTagType' used is 'StartTag'. Use 'emptyElem' if you want to create 'EmptyElemTag' based markup.
--
-- >>> (element_ "br" [])
-- Markup {elements = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}
element_ :: NameTag -> [Attr] -> Markup
element_ :: ByteString -> [Attr] -> Markup
element_ ByteString
n [Attr]
as = [Element] -> Markup
Markup [Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node (OpenTagType -> ByteString -> [Attr] -> Token
OpenTag OpenTagType
StartTag ByteString
n [Attr]
as) []]

-- | Create 'Markup' from a name tag and attributes using 'EmptyElemTag', that doesn't wrap some other markup. No checks are made on whether this creates well-formed markup.
--
-- >>> emptyElem "br" []
-- Markup {elements = [Node {rootLabel = OpenTag EmptyElemTag "br" [], subForest = []}]}
emptyElem :: NameTag -> [Attr] -> Markup
emptyElem :: ByteString -> [Attr] -> Markup
emptyElem ByteString
n [Attr]
as = [Element] -> Markup
Markup [Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node (OpenTagType -> ByteString -> [Attr] -> Token
OpenTag OpenTagType
EmptyElemTag ByteString
n [Attr]
as) []]

-- | Create 'Markup' from a name tag and attributes that wraps some 'Content'. No escaping is performed.
--
-- >>> elementc "div" [] "content"
-- Markup {elements = [Node {rootLabel = OpenTag StartTag "div" [], subForest = [Node {rootLabel = Content "content", subForest = []}]}]}
elementc :: NameTag -> [Attr] -> ByteString -> Markup
elementc :: ByteString -> [Attr] -> ByteString -> Markup
elementc ByteString
n [Attr]
as ByteString
bs = ByteString -> [Attr] -> Markup -> Markup
element ByteString
n [Attr]
as (ByteString -> Markup
contentRaw ByteString
bs)

-- | Create 'Markup' 'Content' from a bytestring, escaping the usual characters.
--
-- >>> content "<content>"
-- Markup {elements = [Node {rootLabel = Content "&lt;content&gt;", subForest = []}]}
content :: ByteString -> Markup
content :: ByteString -> Markup
content ByteString
bs = [Element] -> Markup
Markup [Token -> Element
forall a. a -> Tree a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token -> Element) -> Token -> Element
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
Content (ByteString -> ByteString
escape ByteString
bs)]

-- | Create a Markup element from a bytestring, not escaping the usual characters.
--
-- >>> contentRaw "<content>"
-- Markup {elements = [Node {rootLabel = Content "<content>", subForest = []}]}
--
-- >>> markup_ Html $ markdown_ Compact Html $ contentRaw "<content>"
-- Markup {elements = *** Exception: UnclosedTag
-- ...
contentRaw :: ByteString -> Markup
contentRaw :: ByteString -> Markup
contentRaw ByteString
bs = [Element] -> Markup
Markup [Token -> Element
forall a. a -> Tree a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token -> Element) -> Token -> Element
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
Content ByteString
bs]

-- | Name of an attribute.
type AttrName = ByteString

-- | Value of an attribute. "" is equivalent to true with respect to boolean attributes.
type AttrValue = ByteString

-- | An attribute of a tag
--
-- In parsing, boolean attributes, which are not required to have a value in HTML,
-- will be set a value of "", which is ok. But this will then be rendered.
--
-- >>> detokenize Html <$> tokenize_ Html [i|<input checked>|]
-- ["<input checked=\"\">"]
data Attr = Attr {Attr -> ByteString
attrName :: !AttrName, Attr -> ByteString
attrValue :: !AttrValue}
  deriving ((forall x. Attr -> Rep Attr x)
-> (forall x. Rep Attr x -> Attr) -> Generic Attr
forall x. Rep Attr x -> Attr
forall x. Attr -> Rep Attr x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Attr -> Rep Attr x
from :: forall x. Attr -> Rep Attr x
$cto :: forall x. Rep Attr x -> Attr
to :: forall x. Rep Attr x -> Attr
Generic, Int -> Attr -> ShowS
[Attr] -> ShowS
Attr -> String
(Int -> Attr -> ShowS)
-> (Attr -> String) -> ([Attr] -> ShowS) -> Show Attr
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Attr -> ShowS
showsPrec :: Int -> Attr -> ShowS
$cshow :: Attr -> String
show :: Attr -> String
$cshowList :: [Attr] -> ShowS
showList :: [Attr] -> ShowS
Show, Attr -> Attr -> Bool
(Attr -> Attr -> Bool) -> (Attr -> Attr -> Bool) -> Eq Attr
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Attr -> Attr -> Bool
== :: Attr -> Attr -> Bool
$c/= :: Attr -> Attr -> Bool
/= :: Attr -> Attr -> Bool
Eq, Eq Attr
Eq Attr =>
(Attr -> Attr -> Ordering)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Attr)
-> (Attr -> Attr -> Attr)
-> Ord Attr
Attr -> Attr -> Bool
Attr -> Attr -> Ordering
Attr -> Attr -> Attr
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Attr -> Attr -> Ordering
compare :: Attr -> Attr -> Ordering
$c< :: Attr -> Attr -> Bool
< :: Attr -> Attr -> Bool
$c<= :: Attr -> Attr -> Bool
<= :: Attr -> Attr -> Bool
$c> :: Attr -> Attr -> Bool
> :: Attr -> Attr -> Bool
$c>= :: Attr -> Attr -> Bool
>= :: Attr -> Attr -> Bool
$cmax :: Attr -> Attr -> Attr
max :: Attr -> Attr -> Attr
$cmin :: Attr -> Attr -> Attr
min :: Attr -> Attr -> Attr
Ord)

instance NFData Attr

instance ToExpr Attr

normTokenAttrs :: Token -> Token
normTokenAttrs :: Token -> Token
normTokenAttrs (OpenTag OpenTagType
t ByteString
n [Attr]
as) = OpenTagType -> ByteString -> [Attr] -> Token
OpenTag OpenTagType
t ByteString
n ([Attr] -> [Attr]
normAttrs [Attr]
as)
normTokenAttrs Token
x = Token
x

-- | normalize an attribution list, removing duplicate AttrNames, and space concatenating class values.
normAttrs :: [Attr] -> [Attr]
normAttrs :: [Attr] -> [Attr]
normAttrs [Attr]
as =
  (ByteString -> ByteString -> Attr)
-> (ByteString, ByteString) -> Attr
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry ByteString -> ByteString -> Attr
Attr
    ((ByteString, ByteString) -> Attr)
-> [(ByteString, ByteString)] -> [Attr]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Map ByteString ByteString -> [(ByteString, ByteString)]
forall k a. Map k a -> [(k, a)]
Map.toList (Map ByteString ByteString -> [(ByteString, ByteString)])
-> Map ByteString ByteString -> [(ByteString, ByteString)]
forall a b. (a -> b) -> a -> b
$
            (Map ByteString ByteString -> Attr -> Map ByteString ByteString)
-> Map ByteString ByteString -> [Attr] -> Map ByteString ByteString
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl'
              ( \Map ByteString ByteString
s (Attr ByteString
n ByteString
v) ->
                  (ByteString -> ByteString -> ByteString -> ByteString)
-> ByteString
-> ByteString
-> Map ByteString ByteString
-> Map ByteString ByteString
forall k a.
Ord k =>
(k -> a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWithKey
                    ( \ByteString
k ByteString
new ByteString
old ->
                        case ByteString
k of
                          ByteString
"class" -> ByteString
old ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
" " ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
new
                          ByteString
_ -> ByteString
new
                    )
                    ByteString
n
                    ByteString
v
                    Map ByteString ByteString
s
              )
              Map ByteString ByteString
forall k a. Map k a
Map.empty
              [Attr]
as
        )

-- | render attributes
renderAttrs :: [Attr] -> ByteString
renderAttrs :: [Attr] -> ByteString
renderAttrs [] = ByteString
forall a. Monoid a => a
mempty
renderAttrs [Attr]
xs = Char -> ByteString
B.singleton Char
' ' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ([ByteString] -> ByteString
B.unwords ([ByteString] -> ByteString)
-> ([Attr] -> [ByteString]) -> [Attr] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> ByteString) -> [Attr] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Attr -> ByteString
renderAttr ([Attr] -> ByteString) -> [Attr] -> ByteString
forall a b. (a -> b) -> a -> b
$ [Attr]
xs)

-- | render an attribute
--
-- Does not attempt to escape double quotes.
renderAttr :: Attr -> ByteString
renderAttr :: Attr -> ByteString
renderAttr (Attr ByteString
k ByteString
v) = [i|#{k}="#{v}"|]

-- | bytestring representation of 'Token'.
--
-- >>> detokenize Html (OpenTag StartTag "foo" [])
-- "<foo>"
detokenize :: Standard -> Token -> ByteString
detokenize :: Standard -> Token -> ByteString
detokenize Standard
s = \case
  (OpenTag OpenTagType
StartTag ByteString
n []) -> [i|<#{n}>|]
  (OpenTag OpenTagType
StartTag ByteString
n [Attr]
as) -> [i|<#{n}#{renderAttrs as}>|]
  (OpenTag OpenTagType
EmptyElemTag ByteString
n [Attr]
as) ->
    ByteString -> ByteString -> Bool -> ByteString
forall a. a -> a -> Bool -> a
bool
      [i|<#{n}#{renderAttrs as}/>|]
      [i|<#{n}#{renderAttrs as} />|]
      (Standard
s Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html)
  (EndTag ByteString
n) -> [i|</#{n}>|]
  (Content ByteString
t) -> ByteString
t
  (Comment ByteString
t) -> [i|<!--#{t}-->|]
  (Doctype ByteString
t) -> [i|<!#{t}>|]
  (Decl ByteString
t [Attr]
as) -> ByteString -> ByteString -> Bool -> ByteString
forall a. a -> a -> Bool -> a
bool [i|<?#{t}#{renderAttrs as}?>|] [i|<!#{t}!>|] (Standard
s Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html)

-- | @Indented 0@ puts newlines in between the tags.
data RenderStyle = Compact | Indented Int deriving (RenderStyle -> RenderStyle -> Bool
(RenderStyle -> RenderStyle -> Bool)
-> (RenderStyle -> RenderStyle -> Bool) -> Eq RenderStyle
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: RenderStyle -> RenderStyle -> Bool
== :: RenderStyle -> RenderStyle -> Bool
$c/= :: RenderStyle -> RenderStyle -> Bool
/= :: RenderStyle -> RenderStyle -> Bool
Eq, Int -> RenderStyle -> ShowS
[RenderStyle] -> ShowS
RenderStyle -> String
(Int -> RenderStyle -> ShowS)
-> (RenderStyle -> String)
-> ([RenderStyle] -> ShowS)
-> Show RenderStyle
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> RenderStyle -> ShowS
showsPrec :: Int -> RenderStyle -> ShowS
$cshow :: RenderStyle -> String
show :: RenderStyle -> String
$cshowList :: [RenderStyle] -> ShowS
showList :: [RenderStyle] -> ShowS
Show, (forall x. RenderStyle -> Rep RenderStyle x)
-> (forall x. Rep RenderStyle x -> RenderStyle)
-> Generic RenderStyle
forall x. Rep RenderStyle x -> RenderStyle
forall x. RenderStyle -> Rep RenderStyle x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. RenderStyle -> Rep RenderStyle x
from :: forall x. RenderStyle -> Rep RenderStyle x
$cto :: forall x. Rep RenderStyle x -> RenderStyle
to :: forall x. Rep RenderStyle x -> RenderStyle
Generic)

indentChildren :: RenderStyle -> [ByteString] -> [ByteString]
indentChildren :: RenderStyle -> [ByteString] -> [ByteString]
indentChildren RenderStyle
Compact = [ByteString] -> [ByteString]
forall a. a -> a
id
indentChildren (Indented Int
x) =
  (ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Char -> ByteString
B.replicate Int
x Char
' ' <>)

finalConcat :: RenderStyle -> [ByteString] -> ByteString
finalConcat :: RenderStyle -> [ByteString] -> ByteString
finalConcat RenderStyle
Compact = [ByteString] -> ByteString
forall a. Monoid a => [a] -> a
mconcat
finalConcat (Indented Int
_) =
  ByteString -> [ByteString] -> ByteString
B.intercalate (Char -> ByteString
B.singleton Char
'\n')
    ([ByteString] -> ByteString)
-> ([ByteString] -> [ByteString]) -> [ByteString] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString
"")

-- | Convert 'Markup' to bytestrings
--
-- >>> markdown (Indented 4) Html (markup_ Html [i|<foo><br></foo>|])
-- That "<foo>\n    <br>\n</foo>"
markdown :: RenderStyle -> Standard -> Markup -> Warn ByteString
markdown :: RenderStyle -> Standard -> Markup -> Warn ByteString
markdown RenderStyle
r Standard
s Markup
m = ([ByteString] -> ByteString)
-> Warn [ByteString] -> Warn ByteString
forall b c a. (b -> c) -> These a b -> These a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (RenderStyle -> [ByteString] -> ByteString
finalConcat RenderStyle
r) (Warn [ByteString] -> Warn ByteString)
-> Warn [ByteString] -> Warn ByteString
forall a b. (a -> b) -> a -> b
$ [Warn [ByteString]] -> Warn [ByteString]
forall a. [Warn [a]] -> Warn [a]
concatWarns ([Warn [ByteString]] -> Warn [ByteString])
-> [Warn [ByteString]] -> Warn [ByteString]
forall a b. (a -> b) -> a -> b
$ (Token -> [Warn [ByteString]] -> Warn [ByteString])
-> Element -> Warn [ByteString]
forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (RenderStyle
-> Standard -> Token -> [Warn [ByteString]] -> Warn [ByteString]
renderBranch RenderStyle
r Standard
s) (Element -> Warn [ByteString]) -> [Element] -> [Warn [ByteString]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Markup -> [Element]
elements (Markup -> [Element]) -> Markup -> [Element]
forall a b. (a -> b) -> a -> b
$ Markup -> Markup
normContent Markup
m)

-- | Convert 'Markup' to 'ByteString' and error on warnings.
--
-- >>> B.putStr $ markdown_ (Indented 4) Html (markup_ Html [i|<foo><br></foo>|])
-- <foo>
--     <br>
-- </foo>
markdown_ :: RenderStyle -> Standard -> Markup -> ByteString
markdown_ :: RenderStyle -> Standard -> Markup -> ByteString
markdown_ RenderStyle
r Standard
s = RenderStyle -> Standard -> Markup -> Warn ByteString
markdown RenderStyle
r Standard
s (Markup -> Warn ByteString)
-> (Warn ByteString -> ByteString) -> Markup -> ByteString
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> Warn ByteString -> ByteString
forall a. Warn a -> a
warnError

-- note that renderBranch adds in EndTags for StartTags when needed
renderBranch :: RenderStyle -> Standard -> Token -> [Warn [ByteString]] -> Warn [ByteString]
renderBranch :: RenderStyle
-> Standard -> Token -> [Warn [ByteString]] -> Warn [ByteString]
renderBranch RenderStyle
r Standard
std s :: Token
s@(OpenTag OpenTagType
StartTag ByteString
n [Attr]
_) [Warn [ByteString]]
xs
  | ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers Bool -> Bool -> Bool
&& Standard
std Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html =
      [ByteString] -> Warn [ByteString]
forall a b. b -> These a b
That [Standard -> Token -> ByteString
detokenize Standard
std Token
s] Warn [ByteString] -> Warn [ByteString] -> Warn [ByteString]
forall a. Semigroup a => a -> a -> a
<> ([ByteString] -> [ByteString])
-> Warn [ByteString] -> Warn [ByteString]
forall b c a. (b -> c) -> These a b -> These a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (RenderStyle -> [ByteString] -> [ByteString]
indentChildren RenderStyle
r) ([Warn [ByteString]] -> Warn [ByteString]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [ByteString]]
xs)
  | Bool
otherwise =
      [ByteString] -> Warn [ByteString]
forall a b. b -> These a b
That [Standard -> Token -> ByteString
detokenize Standard
std Token
s] Warn [ByteString] -> Warn [ByteString] -> Warn [ByteString]
forall a. Semigroup a => a -> a -> a
<> ([ByteString] -> [ByteString])
-> Warn [ByteString] -> Warn [ByteString]
forall b c a. (b -> c) -> These a b -> These a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (RenderStyle -> [ByteString] -> [ByteString]
indentChildren RenderStyle
r) ([Warn [ByteString]] -> Warn [ByteString]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [ByteString]]
xs) Warn [ByteString] -> Warn [ByteString] -> Warn [ByteString]
forall a. Semigroup a => a -> a -> a
<> [ByteString] -> Warn [ByteString]
forall a b. b -> These a b
That [Standard -> Token -> ByteString
detokenize Standard
std (ByteString -> Token
EndTag ByteString
n)]
renderBranch RenderStyle
_ Standard
std Token
x [] =
  [ByteString] -> Warn [ByteString]
forall a b. b -> These a b
That [Standard -> Token -> ByteString
detokenize Standard
std Token
x]
renderBranch RenderStyle
r Standard
std Token
x [Warn [ByteString]]
xs =
  [MarkupWarning] -> [ByteString] -> Warn [ByteString]
forall a b. a -> b -> These a b
These [MarkupWarning
LeafWithChildren] [Standard -> Token -> ByteString
detokenize Standard
std Token
x] Warn [ByteString] -> Warn [ByteString] -> Warn [ByteString]
forall a. Semigroup a => a -> a -> a
<> ([ByteString] -> [ByteString])
-> Warn [ByteString] -> Warn [ByteString]
forall b c a. (b -> c) -> These a b -> These a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (RenderStyle -> [ByteString] -> [ByteString]
indentChildren RenderStyle
r) ([Warn [ByteString]] -> Warn [ByteString]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [ByteString]]
xs)

-- | Normalise Content in Markup, concatenating adjacent Content, and removing mempty Content.
--
-- >>> normContent $ content "a" <> content "" <> content "b"
-- Markup {elements = [Node {rootLabel = Content "ab", subForest = []}]}
normContent :: Markup -> Markup
normContent :: Markup -> Markup
normContent (Markup [Element]
trees) = [Element] -> Markup
Markup ([Element] -> Markup) -> [Element] -> Markup
forall a b. (a -> b) -> a -> b
$ (Token -> [Element] -> Element) -> Element -> Element
forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (\Token
x [Element]
xs -> Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
x ((Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Token -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString -> Token
Content ByteString
"") (Token -> Bool) -> (Element -> Token) -> Element -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Token
forall a. Tree a -> a
rootLabel) ([Element] -> [Element]) -> [Element] -> [Element]
forall a b. (a -> b) -> a -> b
$ [Element] -> [Element]
concatContent [Element]
xs)) (Element -> Element) -> [Element] -> [Element]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Element] -> [Element]
concatContent [Element]
trees

concatContent :: [Tree Token] -> [Tree Token]
concatContent :: [Element] -> [Element]
concatContent = \case
  ((Node (Content ByteString
t) [Element]
_) : (Node (Content ByteString
t') [Element]
_) : [Element]
ts) -> [Element] -> [Element]
concatContent ([Element] -> [Element]) -> [Element] -> [Element]
forall a b. (a -> b) -> a -> b
$ Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node (ByteString -> Token
Content (ByteString
t ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
t')) [] Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ts
  (Element
t : [Element]
ts) -> Element
t Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element] -> [Element]
concatContent [Element]
ts
  [] -> []

-- | Gather together token trees from a token list, placing child elements in nodes and removing EndTags.
--
-- >>> gather Html =<< tokenize Html "<foo class=\"bar\">baz</foo>"
-- That (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]})
gather :: Standard -> [Token] -> Warn Markup
gather :: Standard -> [Token] -> Warn Markup
gather Standard
s [Token]
ts = ([Element] -> Markup)
-> These [MarkupWarning] [Element] -> Warn Markup
forall b c a. (b -> c) -> These a b -> These a c
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second [Element] -> Markup
Markup (These [MarkupWarning] [Element] -> Warn Markup)
-> These [MarkupWarning] [Element] -> Warn Markup
forall a b. (a -> b) -> a -> b
$
  case ([Element]
finalSibs, [(Token, [Element])]
finalParents, [MarkupWarning]
warnings) of
    ([Element]
sibs, [], []) -> [Element] -> These [MarkupWarning] [Element]
forall a b. b -> These a b
That ([Element] -> [Element]
forall a. [a] -> [a]
reverse [Element]
sibs)
    ([], [], [MarkupWarning]
xs) -> [MarkupWarning] -> These [MarkupWarning] [Element]
forall a b. a -> These a b
This [MarkupWarning]
xs
    ([Element]
sibs, [(Token, [Element])]
ps, [MarkupWarning]
xs) ->
      [MarkupWarning] -> [Element] -> These [MarkupWarning] [Element]
forall a b. a -> b -> These a b
These ([MarkupWarning]
xs [MarkupWarning] -> [MarkupWarning] -> [MarkupWarning]
forall a. Semigroup a => a -> a -> a
<> [MarkupWarning
UnclosedTag]) ([Element] -> [Element]
forall a. [a] -> [a]
reverse ([Element] -> [Element]) -> [Element] -> [Element]
forall a b. (a -> b) -> a -> b
$ ([Element] -> (Token, [Element]) -> [Element])
-> [Element] -> [(Token, [Element])] -> [Element]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\[Element]
ss' (Token
p, [Element]
ss) -> Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
p ([Element] -> [Element]
forall a. [a] -> [a]
reverse [Element]
ss') Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss) [Element]
sibs [(Token, [Element])]
ps)
  where
    (Cursor [Element]
finalSibs [(Token, [Element])]
finalParents, [MarkupWarning]
warnings) =
      ((Cursor, [MarkupWarning]) -> Token -> (Cursor, [MarkupWarning]))
-> (Cursor, [MarkupWarning])
-> [Token]
-> (Cursor, [MarkupWarning])
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\(Cursor
c, [MarkupWarning]
xs) Token
t -> Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
incCursor Standard
s Token
t Cursor
c (Cursor, Maybe MarkupWarning)
-> ((Cursor, Maybe MarkupWarning) -> (Cursor, [MarkupWarning]))
-> (Cursor, [MarkupWarning])
forall a b. a -> (a -> b) -> b
& (Maybe MarkupWarning -> [MarkupWarning])
-> (Cursor, Maybe MarkupWarning) -> (Cursor, [MarkupWarning])
forall b c a. (b -> c) -> (a, b) -> (a, c)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (Maybe MarkupWarning -> [MarkupWarning]
forall a. Maybe a -> [a]
maybeToList (Maybe MarkupWarning -> [MarkupWarning])
-> ([MarkupWarning] -> [MarkupWarning])
-> Maybe MarkupWarning
-> [MarkupWarning]
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> ([MarkupWarning] -> [MarkupWarning] -> [MarkupWarning]
forall a. Semigroup a => a -> a -> a
<> [MarkupWarning]
xs))) ([Element] -> [(Token, [Element])] -> Cursor
Cursor [] [], []) [Token]
ts

-- | 'gather' but errors on warnings.
gather_ :: Standard -> [Token] -> Markup
gather_ :: Standard -> [Token] -> Markup
gather_ Standard
s [Token]
ts = Standard -> [Token] -> Warn Markup
gather Standard
s [Token]
ts Warn Markup -> (Warn Markup -> Markup) -> Markup
forall a b. a -> (a -> b) -> b
& Warn Markup -> Markup
forall a. Warn a -> a
warnError

incCursor :: Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
-- Only StartTags are ever pushed on to the parent list, here:
incCursor :: Standard -> Token -> Cursor -> (Cursor, Maybe MarkupWarning)
incCursor Standard
Xml t :: Token
t@(OpenTag OpenTagType
StartTag ByteString
_ [Attr]
_) (Cursor [Element]
ss [(Token, [Element])]
ps) = ([Element] -> [(Token, [Element])] -> Cursor
Cursor [] ((Token
t, [Element]
ss) (Token, [Element]) -> [(Token, [Element])] -> [(Token, [Element])]
forall a. a -> [a] -> [a]
: [(Token, [Element])]
ps), Maybe MarkupWarning
forall a. Maybe a
Nothing)
incCursor Standard
Html t :: Token
t@(OpenTag OpenTagType
StartTag ByteString
n [Attr]
_) (Cursor [Element]
ss [(Token, [Element])]
ps) =
  (Cursor -> Cursor -> Bool -> Cursor
forall a. a -> a -> Bool -> a
bool ([Element] -> [(Token, [Element])] -> Cursor
Cursor [] ((Token
t, [Element]
ss) (Token, [Element]) -> [(Token, [Element])] -> [(Token, [Element])]
forall a. a -> [a] -> [a]
: [(Token, [Element])]
ps)) ([Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
t [] Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss) [(Token, [Element])]
ps) (ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers), Maybe MarkupWarning
forall a. Maybe a
Nothing)
incCursor Standard
Xml t :: Token
t@(OpenTag OpenTagType
EmptyElemTag ByteString
_ [Attr]
_) (Cursor [Element]
ss [(Token, [Element])]
ps) = ([Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
t [] Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss) [(Token, [Element])]
ps, Maybe MarkupWarning
forall a. Maybe a
Nothing)
incCursor Standard
Html t :: Token
t@(OpenTag OpenTagType
EmptyElemTag ByteString
n [Attr]
_) (Cursor [Element]
ss [(Token, [Element])]
ps) =
  ( [Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
t [] Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss) [(Token, [Element])]
ps,
    Maybe MarkupWarning
-> Maybe MarkupWarning -> Bool -> Maybe MarkupWarning
forall a. a -> a -> Bool -> a
bool (MarkupWarning -> Maybe MarkupWarning
forall a. a -> Maybe a
Just MarkupWarning
BadEmptyElemTag) Maybe MarkupWarning
forall a. Maybe a
Nothing (ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers)
  )
incCursor Standard
_ (EndTag ByteString
n) (Cursor [Element]
ss ((p :: Token
p@(OpenTag OpenTagType
StartTag ByteString
n' [Attr]
_), [Element]
ss') : [(Token, [Element])]
ps)) =
  ( [Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
p ([Element] -> [Element]
forall a. [a] -> [a]
reverse [Element]
ss) Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss') [(Token, [Element])]
ps,
    Maybe MarkupWarning
-> Maybe MarkupWarning -> Bool -> Maybe MarkupWarning
forall a. a -> a -> Bool -> a
bool (MarkupWarning -> Maybe MarkupWarning
forall a. a -> Maybe a
Just (ByteString -> ByteString -> MarkupWarning
TagMismatch ByteString
n ByteString
n')) Maybe MarkupWarning
forall a. Maybe a
Nothing (ByteString
n ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
n')
  )
-- Non-StartTag on parent list
incCursor Standard
_ (EndTag ByteString
_) (Cursor [Element]
ss ((Token
p, [Element]
ss') : [(Token, [Element])]
ps)) =
  ( [Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
p ([Element] -> [Element]
forall a. [a] -> [a]
reverse [Element]
ss) Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss') [(Token, [Element])]
ps,
    MarkupWarning -> Maybe MarkupWarning
forall a. a -> Maybe a
Just MarkupWarning
LeafWithChildren
  )
incCursor Standard
_ (EndTag ByteString
_) (Cursor [Element]
ss []) =
  ( [Element] -> [(Token, [Element])] -> Cursor
Cursor [Element]
ss [],
    MarkupWarning -> Maybe MarkupWarning
forall a. a -> Maybe a
Just MarkupWarning
UnmatchedEndTag
  )
incCursor Standard
_ Token
t (Cursor [Element]
ss [(Token, [Element])]
ps) = ([Element] -> [(Token, [Element])] -> Cursor
Cursor (Token -> [Element] -> Element
forall a. a -> [Tree a] -> Tree a
Node Token
t [] Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
: [Element]
ss) [(Token, [Element])]
ps, Maybe MarkupWarning
forall a. Maybe a
Nothing)

data Cursor = Cursor
  { -- siblings, not (yet) part of another element
    Cursor -> [Element]
_sibs :: [Tree Token],
    -- open elements and their siblings.
    Cursor -> [(Token, [Element])]
_stack :: [(Token, [Tree Token])]
  }

-- | Convert a markup into a token list, adding end tags.
--
-- >>> degather Html =<< markup Html "<foo class=\"bar\">baz</foo>"
-- That [OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}],Content "baz",EndTag "foo"]
degather :: Standard -> Markup -> Warn [Token]
degather :: Standard -> Markup -> Warn [Token]
degather Standard
s (Markup [Element]
tree) = [Warn [Token]] -> Warn [Token]
forall a. [Warn [a]] -> Warn [a]
concatWarns ([Warn [Token]] -> Warn [Token]) -> [Warn [Token]] -> Warn [Token]
forall a b. (a -> b) -> a -> b
$ (Token -> [Warn [Token]] -> Warn [Token])
-> Element -> Warn [Token]
forall a b. (a -> [b] -> b) -> Tree a -> b
foldTree (Standard -> Token -> [Warn [Token]] -> Warn [Token]
addCloseTags Standard
s) (Element -> Warn [Token]) -> [Element] -> [Warn [Token]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Element]
tree

-- | 'degather' but errors on warning
degather_ :: Standard -> Markup -> [Token]
degather_ :: Standard -> Markup -> [Token]
degather_ Standard
s Markup
m = Standard -> Markup -> Warn [Token]
degather Standard
s Markup
m Warn [Token] -> (Warn [Token] -> [Token]) -> [Token]
forall a b. a -> (a -> b) -> b
& Warn [Token] -> [Token]
forall a. Warn a -> a
warnError

concatWarns :: [Warn [a]] -> Warn [a]
concatWarns :: forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [a]]
rs = case ([[MarkupWarning]] -> [MarkupWarning])
-> ([[a]] -> [a])
-> ([[MarkupWarning]], [[a]])
-> ([MarkupWarning], [a])
forall a b c d. (a -> b) -> (c -> d) -> (a, c) -> (b, d)
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap [[MarkupWarning]] -> [MarkupWarning]
forall a. Monoid a => [a] -> a
mconcat [[a]] -> [a]
forall a. Monoid a => [a] -> a
mconcat (([[MarkupWarning]], [[a]]) -> ([MarkupWarning], [a]))
-> ([[MarkupWarning]], [[a]]) -> ([MarkupWarning], [a])
forall a b. (a -> b) -> a -> b
$ [Warn [a]] -> ([[MarkupWarning]], [[a]])
forall a b. [These a b] -> ([a], [b])
partitionHereThere [Warn [a]]
rs of
  ([], [a]
xs) -> [a] -> Warn [a]
forall a b. b -> These a b
That [a]
xs
  ([MarkupWarning]
es, []) -> [MarkupWarning] -> Warn [a]
forall a b. a -> These a b
This [MarkupWarning]
es
  ([MarkupWarning]
es, [a]
xs) -> [MarkupWarning] -> [a] -> Warn [a]
forall a b. a -> b -> These a b
These [MarkupWarning]
es [a]
xs

addCloseTags :: Standard -> Token -> [Warn [Token]] -> Warn [Token]
addCloseTags :: Standard -> Token -> [Warn [Token]] -> Warn [Token]
addCloseTags Standard
std s :: Token
s@(OpenTag OpenTagType
StartTag ByteString
n [Attr]
_) [Warn [Token]]
children
  | [Warn [Token]]
children [Warn [Token]] -> [Warn [Token]] -> Bool
forall a. Eq a => a -> a -> Bool
/= [] Bool -> Bool -> Bool
&& ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers Bool -> Bool -> Bool
&& Standard
std Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html =
      [MarkupWarning] -> [Token] -> Warn [Token]
forall a b. a -> b -> These a b
These [MarkupWarning
SelfCloserWithChildren] [Token
s] Warn [Token] -> Warn [Token] -> Warn [Token]
forall a. Semigroup a => a -> a -> a
<> [Warn [Token]] -> Warn [Token]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [Token]]
children
  | ByteString
n ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
selfClosers Bool -> Bool -> Bool
&& Standard
std Standard -> Standard -> Bool
forall a. Eq a => a -> a -> Bool
== Standard
Html =
      [Token] -> Warn [Token]
forall a b. b -> These a b
That [Token
s] Warn [Token] -> Warn [Token] -> Warn [Token]
forall a. Semigroup a => a -> a -> a
<> [Warn [Token]] -> Warn [Token]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [Token]]
children
  | Bool
otherwise =
      [Token] -> Warn [Token]
forall a b. b -> These a b
That [Token
s] Warn [Token] -> Warn [Token] -> Warn [Token]
forall a. Semigroup a => a -> a -> a
<> [Warn [Token]] -> Warn [Token]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [Token]]
children Warn [Token] -> Warn [Token] -> Warn [Token]
forall a. Semigroup a => a -> a -> a
<> [Token] -> Warn [Token]
forall a b. b -> These a b
That [ByteString -> Token
EndTag ByteString
n]
addCloseTags Standard
_ Token
x [Warn [Token]]
xs = case [Warn [Token]]
xs of
  [] -> [Token] -> Warn [Token]
forall a b. b -> These a b
That [Token
x]
  [Warn [Token]]
cs -> [MarkupWarning] -> [Token] -> Warn [Token]
forall a b. a -> b -> These a b
These [MarkupWarning
LeafWithChildren] [Token
x] Warn [Token] -> Warn [Token] -> Warn [Token]
forall a. Semigroup a => a -> a -> a
<> [Warn [Token]] -> Warn [Token]
forall a. [Warn [a]] -> Warn [a]
concatWarns [Warn [Token]]
cs

tokenXmlP :: Parser e Token
tokenXmlP :: forall e. Parser e Token
tokenXmlP =
  $( switch
       [|
         case _ of
           "<!--" -> commentP
           "<!" -> doctypeXmlP
           "</" -> endTagXmlP
           "<?" -> declXmlP
           "<" -> startTagsXmlP
           _ -> contentP
         |]
   )

-- [4]
nameStartCharP :: Parser e Char
nameStartCharP :: forall e. Parser e Char
nameStartCharP = (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT PureMode e Char
forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isLatinLetter Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar Char -> Bool
isNameStartChar

isNameStartChar :: Char -> Bool
isNameStartChar :: Char -> Bool
isNameStartChar Char
x =
  (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- [4a]
nameCharP :: Parser e Char
nameCharP :: forall e. Parser e Char
nameCharP = (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT PureMode e Char
forall (st :: ZeroBitType) e.
(Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> (Char -> Bool)
-> ParserT st e Char
fusedSatisfy Char -> Bool
isNameCharAscii Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt Char -> Bool
isNameCharExt

isNameCharAscii :: Char -> Bool
isNameCharAscii :: Char -> Bool
isNameCharAscii Char
x =
  (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.')

isNameCharExt :: Char -> Bool
isNameCharExt :: Char -> Bool
isNameCharExt Char
x =
  (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xB7')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x300' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x36F')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
    Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')

-- | name string according to xml production rule [5]
nameXmlP :: Parser e ByteString
nameXmlP :: forall e. Parser e ByteString
nameXmlP = ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (Parser e Char
forall e. Parser e Char
nameStartCharP Parser e Char
-> ParserT PureMode e String -> ParserT PureMode e String
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser e Char
forall e. Parser e Char
nameCharP)

commentCloseP :: Parser e ()
commentCloseP :: forall e. Parser e ()
commentCloseP = $(string "-->")

charNotMinusP :: Parser e ByteString
charNotMinusP :: forall e. Parser e ByteString
charNotMinusP = ParserT PureMode e Char -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e Char -> ParserT PureMode e ByteString)
-> ParserT PureMode e Char -> ParserT PureMode e ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-')

minusPlusCharP :: Parser e ByteString
minusPlusCharP :: forall e. Parser e ByteString
minusPlusCharP = ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf ($(char '-') ParserT PureMode e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParserT PureMode e ByteString
forall e. Parser e ByteString
charNotMinusP)

commentP :: Parser e Token
commentP :: forall e. Parser e Token
commentP = ByteString -> Token
Comment (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e [ByteString] -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e ByteString -> ParserT PureMode e [ByteString]
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (ParserT PureMode e ByteString
forall e. Parser e ByteString
charNotMinusP ParserT PureMode e ByteString
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> ParserT PureMode e ByteString
forall e. Parser e ByteString
minusPlusCharP)) ParserT PureMode e Token
-> ParserT PureMode e () -> ParserT PureMode e Token
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParserT PureMode e ()
forall e. Parser e ()
commentCloseP

contentP :: Parser e Token
contentP :: forall e. Parser e Token
contentP = ByteString -> Token
Content (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<')))

-- | XML declaration as per production rule [23]
declXmlP :: Parser e Token
declXmlP :: forall e. Parser e Token
declXmlP = do
  ()
_ <- $(string "xml")
  Attr
av <- ByteString -> ByteString -> Attr
Attr ByteString
"version" (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlVersionInfoP
  Attr
en <- ByteString -> ByteString -> Attr
Attr ByteString
"encoding" (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlEncodingDeclP
  Maybe Attr
st <- ParserT PureMode e Attr -> ParserT PureMode e (Maybe Attr)
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional (ParserT PureMode e Attr -> ParserT PureMode e (Maybe Attr))
-> ParserT PureMode e Attr -> ParserT PureMode e (Maybe Attr)
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> Attr
Attr ByteString
"standalone" (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlStandaloneP
  ()
_ <- ParserT PureMode e ()
forall e. Parser e ()
ws_
  ()
_ <- $(string "?>")
  Token -> Parser e Token
forall a. a -> ParserT PureMode e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Token -> Parser e Token) -> Token -> Parser e Token
forall a b. (a -> b) -> a -> b
$ ByteString -> [Attr] -> Token
Decl ByteString
"xml" ([Attr] -> Token) -> [Attr] -> Token
forall a b. (a -> b) -> a -> b
$ [Attr
av, Attr
en] [Attr] -> [Attr] -> [Attr]
forall a. Semigroup a => a -> a -> a
<> Maybe Attr -> [Attr]
forall a. Maybe a -> [a]
maybeToList Maybe Attr
st

-- | xml production [24]
xmlVersionInfoP :: Parser e ByteString
xmlVersionInfoP :: forall e. Parser e ByteString
xmlVersionInfoP = ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e ByteString -> ParserT PureMode e ByteString)
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b. (a -> b) -> a -> b
$ Parser e ()
forall e. Parser e ()
ws_ Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> $(string "version") Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser e ()
forall e. Parser e ()
eq Parser e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall e a. Parser e a -> Parser e a
wrappedQNoGuard ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlVersionNumP

-- | xml production [26]
xmlVersionNumP :: Parser e ByteString
xmlVersionNumP :: forall e. Parser e ByteString
xmlVersionNumP =
  ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf ($(string "1.") ParserT PureMode e ()
-> ParserT PureMode e String -> ParserT PureMode e String
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isDigit))

-- | Doctype declaration as per production rule [28]
doctypeXmlP :: Parser e Token
doctypeXmlP :: forall e. Parser e Token
doctypeXmlP =
  ByteString -> Token
Doctype
    (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e () -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf
      ( $(string "DOCTYPE")
          ParserT PureMode e ()
-> ParserT PureMode e () -> ParserT PureMode e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ()
forall e. Parser e ()
ws_
          ParserT PureMode e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ByteString
forall e. Parser e ByteString
nameXmlP
          ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
          -- optional (ws_ >> xmlExternalID) >>
          ParserT PureMode e ()
forall e. Parser e ()
ws_
          ParserT PureMode e ()
-> ParserT PureMode e (Maybe String)
-> ParserT PureMode e (Maybe String)
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e String -> ParserT PureMode e (Maybe String)
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e (Maybe a)
optional ParserT PureMode e String
forall e. Parser e String
bracketedSB
          ParserT PureMode e (Maybe String)
-> ParserT PureMode e () -> ParserT PureMode e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ()
forall e. Parser e ()
ws_
      )
    ParserT PureMode e Token
-> ParserT PureMode e () -> ParserT PureMode e Token
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

-- | xml production [32]
xmlStandaloneP :: Parser e ByteString
xmlStandaloneP :: forall e. Parser e ByteString
xmlStandaloneP =
  ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e ByteString -> ParserT PureMode e ByteString)
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b. (a -> b) -> a -> b
$
    Parser e ()
forall e. Parser e ()
ws_ Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "standalone") Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser e ()
forall e. Parser e ()
eq Parser e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlYesNoP

-- | xml yes/no
xmlYesNoP :: Parser e ByteString
xmlYesNoP :: forall e. Parser e ByteString
xmlYesNoP = Parser e ByteString -> Parser e ByteString
forall e a. Parser e a -> Parser e a
wrappedQNoGuard (ParserT PureMode e () -> Parser e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e () -> Parser e ByteString)
-> ParserT PureMode e () -> Parser e ByteString
forall a b. (a -> b) -> a -> b
$ $(string "yes") ParserT PureMode e ()
-> ParserT PureMode e () -> ParserT PureMode e ()
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> $(string "no"))

-- | xml production [80]
xmlEncodingDeclP :: Parser e ByteString
xmlEncodingDeclP :: forall e. Parser e ByteString
xmlEncodingDeclP = Parser e ()
forall e. Parser e ()
ws_ Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> $(string "encoding") Parser e () -> Parser e () -> Parser e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser e ()
forall e. Parser e ()
eq Parser e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall e a. Parser e a -> Parser e a
wrappedQNoGuard ParserT PureMode e ByteString
forall e. Parser e ByteString
xmlEncNameP

-- | xml production [81]
xmlEncNameP :: Parser e ByteString
xmlEncNameP :: forall e. Parser e ByteString
xmlEncNameP = ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter ParserT PureMode e Char
-> ParserT PureMode e String -> ParserT PureMode e String
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii (\Char
x -> Char -> Bool
isLatinLetter Char
x Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
x Bool -> Bool -> Bool
|| Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
x (String
"._-" :: [Char]))))

-- | open xml tag as per xml production rule [40]
--  self-closing xml tag as per [44]
startTagsXmlP :: Parser e Token
startTagsXmlP :: forall e. Parser e Token
startTagsXmlP = do
  !ByteString
n <- Parser e ByteString
forall e. Parser e ByteString
nameXmlP
  ![Attr]
as <- ParserT PureMode e Attr -> ParserT PureMode e [Attr]
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser e ()
forall e. Parser e ()
ws_ Parser e () -> ParserT PureMode e Attr -> ParserT PureMode e Attr
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParserT PureMode e Attr
forall e. Parser e Attr
attrXmlP)
  ()
_ <- Parser e ()
forall e. Parser e ()
ws_
  $( switch
       [|
         case _ of
           "/>" -> pure (OpenTag EmptyElemTag n as)
           ">" -> pure (OpenTag StartTag n as)
         |]
   )

attrXmlP :: Parser e Attr
attrXmlP :: forall e. Parser e Attr
attrXmlP = ByteString -> ByteString -> Attr
Attr (ByteString -> ByteString -> Attr)
-> ParserT PureMode e ByteString
-> ParserT PureMode e (ByteString -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ParserT PureMode e ByteString
forall e. Parser e ByteString
nameXmlP ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParserT PureMode e ()
forall e. Parser e ()
eq) ParserT PureMode e (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall a b.
ParserT PureMode e (a -> b)
-> ParserT PureMode e a -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParserT PureMode e ByteString
forall e. Parser e ByteString
wrappedQ

-- | closing tag as per [42]
endTagXmlP :: Parser e Token
endTagXmlP :: forall e. Parser e Token
endTagXmlP = ByteString -> Token
EndTag (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ParserT PureMode e ByteString
forall e. Parser e ByteString
nameXmlP ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParserT PureMode e ()
forall e. Parser e ()
ws_ ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>'))

-- | Parse a single 'Token'.
tokenHtmlP :: Parser e Token
tokenHtmlP :: forall e. Parser e Token
tokenHtmlP =
  $( switch
       [|
         case _ of
           "<!--" -> commentP
           "<!" -> doctypeHtmlP
           "</" -> endTagHtmlP
           "<?" -> bogusCommentHtmlP
           "<" -> startTagsHtmlP
           _ -> contentP
         |]
   )

bogusCommentHtmlP :: Parser e Token
bogusCommentHtmlP :: forall e. Parser e Token
bogusCommentHtmlP = ByteString -> Token
Comment (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<')))

doctypeHtmlP :: Parser e Token
doctypeHtmlP :: forall e. Parser e Token
doctypeHtmlP =
  ByteString -> Token
Doctype
    (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e () -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf
      ( $(string "DOCTYPE")
          ParserT PureMode e ()
-> ParserT PureMode e () -> ParserT PureMode e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ()
forall e. Parser e ()
ws_
          ParserT PureMode e ()
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ByteString
forall e. Parser e ByteString
nameHtmlP
          ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ()
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParserT PureMode e ()
forall e. Parser e ()
ws_
      )
    ParserT PureMode e Token
-> ParserT PureMode e () -> ParserT PureMode e Token
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

startTagsHtmlP :: Parser e Token
startTagsHtmlP :: forall e. Parser e Token
startTagsHtmlP = do
  ByteString
n <- Parser e ByteString
forall e. Parser e ByteString
nameHtmlP
  [Attr]
as <- Standard -> Parser e [Attr]
forall a. Standard -> Parser a [Attr]
attrsP Standard
Html
  ()
_ <- Parser e ()
forall e. Parser e ()
ws_
  $( switch
       [|
         case _ of
           "/>" -> pure (OpenTag EmptyElemTag n as)
           ">" -> pure (OpenTag StartTag n as)
         |]
   )

endTagHtmlP :: Parser e Token
endTagHtmlP :: forall e. Parser e Token
endTagHtmlP = ByteString -> Token
EndTag (ByteString -> Token)
-> ParserT PureMode e ByteString -> ParserT PureMode e Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e ByteString
forall e. Parser e ByteString
nameHtmlP ParserT PureMode e Token
-> ParserT PureMode e () -> ParserT PureMode e Token
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParserT PureMode e ()
forall e. Parser e ()
ws_ ParserT PureMode e Token
-> ParserT PureMode e () -> ParserT PureMode e Token
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* $(char '>')

-- | Parse a tag name. Each standard is slightly different.
nameP :: Standard -> Parser e ByteString
nameP :: forall e. Standard -> Parser e ByteString
nameP Standard
Html = Parser e ByteString
forall e. Parser e ByteString
nameHtmlP
nameP Standard
Xml = Parser e ByteString
forall e. Parser e ByteString
nameXmlP

nameHtmlP :: Parser e ByteString
nameHtmlP :: forall e. Parser e ByteString
nameHtmlP = do
  ParserT PureMode e String -> Parser e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (Parser e Char
forall e. Parser e Char
nameStartCharHtmlP Parser e Char
-> ParserT PureMode e String -> ParserT PureMode e String
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many ((Char -> Bool) -> Parser e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isNameChar))

nameStartCharHtmlP :: Parser e Char
nameStartCharHtmlP :: forall e. Parser e Char
nameStartCharHtmlP = (Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfyAscii Char -> Bool
isLatinLetter

isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
x =
  Bool -> Bool
not
    ( Char -> Bool
isWhitespace Char
x
        Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/')
        Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'<')
        Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>')
    )

attrHtmlP :: Parser e Attr
attrHtmlP :: forall e. Parser e Attr
attrHtmlP =
  (ByteString -> ByteString -> Attr
Attr (ByteString -> ByteString -> Attr)
-> ParserT PureMode e ByteString
-> ParserT PureMode e (ByteString -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ParserT PureMode e ByteString
forall e. Parser e ByteString
attrNameP ParserT PureMode e ByteString
-> ParserT PureMode e () -> ParserT PureMode e ByteString
forall a b.
ParserT PureMode e a
-> ParserT PureMode e b -> ParserT PureMode e a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParserT PureMode e ()
forall e. Parser e ()
eq) ParserT PureMode e (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall a b.
ParserT PureMode e (a -> b)
-> ParserT PureMode e a -> ParserT PureMode e b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (ParserT PureMode e ByteString
forall e. Parser e ByteString
wrappedQ ParserT PureMode e ByteString
-> ParserT PureMode e ByteString -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> ParserT PureMode e ByteString
forall e. Parser e ByteString
attrBooleanNameP))
    ParserT PureMode e Attr
-> ParserT PureMode e Attr -> ParserT PureMode e Attr
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e a -> ParserT st e a
<|> ((ByteString -> ByteString -> Attr
`Attr` ByteString
forall a. Monoid a => a
mempty) (ByteString -> Attr)
-> ParserT PureMode e ByteString -> ParserT PureMode e Attr
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParserT PureMode e ByteString
forall e. Parser e ByteString
attrBooleanNameP)

attrBooleanNameP :: Parser e ByteString
attrBooleanNameP :: forall e. Parser e ByteString
attrBooleanNameP = ParserT PureMode e String -> ParserT PureMode e ByteString
forall (st :: ZeroBitType) e a.
ParserT st e a -> ParserT st e ByteString
byteStringOf (ParserT PureMode e String -> ParserT PureMode e ByteString)
-> ParserT PureMode e String -> ParserT PureMode e ByteString
forall a b. (a -> b) -> a -> b
$ ParserT PureMode e Char -> ParserT PureMode e String
forall a. ParserT PureMode e a -> ParserT PureMode e [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> ParserT PureMode e Char
forall (st :: ZeroBitType) e. (Char -> Bool) -> ParserT st e Char
satisfy Char -> Bool
isBooleanAttrName)

-- | Parse an 'Attr'
attrP :: Standard -> Parser a Attr
attrP :: forall a. Standard -> Parser a Attr
attrP Standard
Html = Parser a Attr
forall e. Parser e Attr
attrHtmlP
attrP Standard
Xml = Parser a Attr
forall e. Parser e Attr
attrXmlP

-- | Parse attributions
attrsP :: Standard -> Parser a [Attr]
attrsP :: forall a. Standard -> Parser a [Attr]
attrsP Standard
s = ParserT PureMode a Attr -> ParserT PureMode a [Attr]
forall a. ParserT PureMode a a -> ParserT PureMode a [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser a ()
forall e. Parser e ()
ws_ Parser a () -> ParserT PureMode a Attr -> ParserT PureMode a Attr
forall a b.
ParserT PureMode a a
-> ParserT PureMode a b -> ParserT PureMode a b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Standard -> ParserT PureMode a Attr
forall a. Standard -> Parser a Attr
attrP Standard
s) ParserT PureMode a [Attr]
-> Parser a () -> ParserT PureMode a [Attr]
forall a b.
ParserT PureMode a a
-> ParserT PureMode a b -> ParserT PureMode a a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser a ()
forall e. Parser e ()
ws_

attrNameP :: Parser e ByteString
attrNameP :: forall e. Parser e ByteString
attrNameP = (Char -> Bool) -> Parser e ByteString
forall e. (Char -> Bool) -> Parser e ByteString
isa Char -> Bool
isAttrName

isAttrName :: Char -> Bool
isAttrName :: Char -> Bool
isAttrName Char
x =
  Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$
    Char -> Bool
isWhitespace Char
x
      Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/')
      Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>')
      Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'=')

isBooleanAttrName :: Char -> Bool
isBooleanAttrName :: Char -> Bool
isBooleanAttrName Char
x =
  Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$
    Char -> Bool
isWhitespace Char
x
      Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/')
      Bool -> Bool -> Bool
|| (Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>')