{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE LambdaCase #-}
{-# OPTIONS_GHC -O2 #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Redundant id" #-}
{-# HLINT ignore "Redundant bracket" #-}
module Text.HTML.Parser
(
parseTokens
, parseTokensLazy
, token
, Token(..)
, TagName, AttrName, AttrValue
, Attr(..)
, renderTokens
, renderToken
, renderAttrs
, renderAttr
, canonicalizeTokens
) where
import Data.Char hiding (isSpace)
import Data.List (unfoldr)
import GHC.Generics
import Control.Applicative
import Data.Monoid
import Control.Monad (guard)
import Control.DeepSeq
import Data.Attoparsec.Text
import qualified Data.Attoparsec.Text.Lazy as AL
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Builder (Builder)
import qualified Data.Text.Lazy.Builder as B
import Prelude hiding (take, takeWhile)
import Text.HTML.Parser.Entities (entities)
import qualified Data.Trie as Trie
type TagName = Text
type AttrName = Text
type AttrValue = Text
data Token
= TagOpen !TagName [Attr]
| TagSelfClose !TagName [Attr]
| TagClose !TagName
| ContentText !Text
| ContentChar !Char
| !Builder
| Doctype !Text
deriving (Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Token -> ShowS
showsPrec :: Int -> Token -> ShowS
$cshow :: Token -> String
show :: Token -> String
$cshowList :: [Token] -> ShowS
showList :: [Token] -> ShowS
Show, Eq Token
Eq Token =>
(Token -> Token -> Ordering)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Token)
-> (Token -> Token -> Token)
-> Ord Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Token -> Token -> Ordering
compare :: Token -> Token -> Ordering
$c< :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
>= :: Token -> Token -> Bool
$cmax :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
min :: Token -> Token -> Token
Ord, Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
/= :: Token -> Token -> Bool
Eq, (forall x. Token -> Rep Token x)
-> (forall x. Rep Token x -> Token) -> Generic Token
forall x. Rep Token x -> Token
forall x. Token -> Rep Token x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. Token -> Rep Token x
from :: forall x. Token -> Rep Token x
$cto :: forall x. Rep Token x -> Token
to :: forall x. Rep Token x -> Token
Generic)
endOfFileToken :: Token
endOfFileToken :: Token
endOfFileToken = Text -> Token
ContentText Text
""
data Attr = Attr !AttrName !AttrValue
deriving (Int -> Attr -> ShowS
[Attr] -> ShowS
Attr -> String
(Int -> Attr -> ShowS)
-> (Attr -> String) -> ([Attr] -> ShowS) -> Show Attr
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Attr -> ShowS
showsPrec :: Int -> Attr -> ShowS
$cshow :: Attr -> String
show :: Attr -> String
$cshowList :: [Attr] -> ShowS
showList :: [Attr] -> ShowS
Show, Attr -> Attr -> Bool
(Attr -> Attr -> Bool) -> (Attr -> Attr -> Bool) -> Eq Attr
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Attr -> Attr -> Bool
== :: Attr -> Attr -> Bool
$c/= :: Attr -> Attr -> Bool
/= :: Attr -> Attr -> Bool
Eq, Eq Attr
Eq Attr =>
(Attr -> Attr -> Ordering)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Bool)
-> (Attr -> Attr -> Attr)
-> (Attr -> Attr -> Attr)
-> Ord Attr
Attr -> Attr -> Bool
Attr -> Attr -> Ordering
Attr -> Attr -> Attr
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Attr -> Attr -> Ordering
compare :: Attr -> Attr -> Ordering
$c< :: Attr -> Attr -> Bool
< :: Attr -> Attr -> Bool
$c<= :: Attr -> Attr -> Bool
<= :: Attr -> Attr -> Bool
$c> :: Attr -> Attr -> Bool
> :: Attr -> Attr -> Bool
$c>= :: Attr -> Attr -> Bool
>= :: Attr -> Attr -> Bool
$cmax :: Attr -> Attr -> Attr
max :: Attr -> Attr -> Attr
$cmin :: Attr -> Attr -> Attr
min :: Attr -> Attr -> Attr
Ord)
instance NFData Token where
rnf :: Token -> ()
rnf (Comment Builder
b) = Text -> ()
forall a. NFData a => a -> ()
rnf (Text -> ()) -> Text -> ()
forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
b
rnf Token
_ = ()
token :: Parser Token
token :: Parser Token
token = Parser Token
dataState
dataState :: Parser Token
dataState :: Parser Token
dataState = do
Text
content <- (Char -> Bool) -> Parser Text
takeWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'&')
if Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
content
then Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Text -> Token
ContentText Text
content
else [Parser Token] -> Parser Token
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
tagOpen
, Parser Token -> Parser Token
forall i a. Parser i a -> Parser i a
try (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
'&' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
charRef
, Char -> Token
ContentChar Char
'&' Token -> Parser Char -> Parser Token
forall a b. a -> Parser Text b -> Parser Text a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser Char
char Char
'&'
]
charRef :: Parser Token
charRef :: Parser Token
charRef = Trie Char Token -> Parser Token
go Trie Char Token
entityTrie
where
go :: Trie.Trie Char Token -> Parser Token
go :: Trie Char Token -> Parser Token
go Trie Char Token
trie = do
Char
c <- Parser Char
anyChar
case Char
c of
Char
';' -> Parser Token
-> (Token -> Parser Token) -> Maybe Token -> Parser Token
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser Token
forall a. Parser Text a
forall (f :: * -> *) a. Alternative f => f a
empty Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Trie Char Token -> Maybe Token
forall k v. Trie k v -> Maybe v
Trie.terminal Trie Char Token
trie)
Char
_ -> Trie Char Token -> Parser Token
go (Char -> Trie Char Token -> Trie Char Token
forall k v. Ord k => k -> Trie k v -> Trie k v
Trie.step Char
c Trie Char Token
trie)
entityTrie :: Trie.Trie Char Token
entityTrie :: Trie Char Token
entityTrie = [(String, Token)] -> Trie Char Token
forall k v. Ord k => [([k], v)] -> Trie k v
Trie.fromList
[ (Text -> String
T.unpack Text
name, Text -> Token
ContentText Text
expansion)
| (Text
name, Text
expansion) <- [(Text, Text)]
entities
]
tagOpen :: Parser Token
tagOpen :: Parser Token
tagOpen =
(Char -> Parser Char
char Char
'!' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
markupDeclOpen)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
endTagOpen)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'?' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
bogusComment Builder
forall a. Monoid a => a
mempty)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
tagNameOpen
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
other
where
other :: Parser Token
other = do
Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Char -> Token
ContentChar Char
'<'
endTagOpen :: Parser Token
endTagOpen :: Parser Token
endTagOpen = Parser Token
tagNameClose
isWhitespace :: Char -> Bool
isWhitespace :: Char -> Bool
isWhitespace Char
'\x09' = Bool
True
isWhitespace Char
'\x0a' = Bool
True
isWhitespace Char
'\x0c' = Bool
True
isWhitespace Char
'\x0d' = Bool
True
isWhitespace Char
' ' = Bool
True
isWhitespace Char
_ = Bool
False
orC :: (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
orC :: (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
orC Char -> Bool
f Char -> Bool
g Char
c = Char -> Bool
f Char
c Bool -> Bool -> Bool
|| Char -> Bool
g Char
c
{-# INLINE orC #-}
isC :: Char -> Char -> Bool
isC :: Char -> Char -> Bool
isC = Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
(==)
{-# INLINE isC #-}
tagNameOpen :: Parser Token
tagNameOpen :: Parser Token
tagNameOpen = do
Text
tag <- Parser Text
tagName'
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [])
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [])
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag []))
tagNameClose :: Parser Token
tagNameClose :: Parser Token
tagNameClose = do
Text
tag <- Parser Text
tagName'
Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Token
TagClose Text
tag)
tagName' :: Parser Text
tagName' :: Parser Text
tagName' = do
Char
c <- Parser Char
peekChar'
Bool -> Parser Text ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Parser Text ()) -> Bool -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
(Char -> Bool) -> Parser Text
takeWhile ((Char -> Bool) -> Parser Text) -> (Char -> Bool) -> Parser Text
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'<' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>')
selfClosingStartTag :: TagName -> [Attr] -> Parser Token
selfClosingStartTag :: Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs = do
(Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagSelfClose Text
tag [Attr]
attrs))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs
beforeAttrName :: TagName -> [Attr] -> Parser Token
beforeAttrName :: Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'/' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag [Attr]
attrs))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
attrName Text
tag [Attr]
attrs
attrName :: TagName -> [Attr] -> Parser Token
attrName :: Text -> [Attr] -> Parser Token
attrName Text
tag [Attr]
attrs = do
Text
name <- (Char -> Bool) -> Parser Text
takeWhile ((Char -> Bool) -> Parser Text) -> (Char -> Bool) -> Parser Text
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'=' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>')
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'=' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token -> Parser Token
forall i a. Parser i a -> Parser i a
try (do Maybe Char
mc <- Parser (Maybe Char)
peekChar
case Maybe Char
mc of
Just Char
c | Char -> Bool
notNameChar Char
c -> Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name
Maybe Char
_ -> Parser Token
forall a. Parser Text a
forall (f :: * -> *) a. Alternative f => f a
empty)
where notNameChar :: Char -> Bool
notNameChar = Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'/' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>'
afterAttrName :: TagName -> [Attr] -> AttrName -> Parser Token
afterAttrName :: Text -> [Attr] -> Text -> Parser Token
afterAttrName Text
tag [Attr]
attrs Text
name = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'/' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'=' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs)))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Parser Token
attrName Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs)
beforeAttrValue :: TagName -> [Attr] -> AttrName -> Parser Token
beforeAttrValue :: Text -> [Attr] -> Text -> Parser Token
beforeAttrValue Text
tag [Attr]
attrs Text
name = do
(Char -> Bool) -> Parser Text ()
skipWhile Char -> Bool
isWhitespace
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'"' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
attrValueDQuoted Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\'' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Text -> Parser Token
attrValueSQuoted Text
tag [Attr]
attrs Text
name)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
T.empty Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs)))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> [Attr] -> Text -> Parser Token
attrValueUnquoted Text
tag [Attr]
attrs Text
name
attrValueDQuoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueDQuoted :: Text -> [Attr] -> Text -> Parser Token
attrValueDQuoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"')
Char
_ <- Char -> Parser Char
char Char
'"'
Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value
attrValueSQuoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueSQuoted :: Text -> [Attr] -> Text -> Parser Token
attrValueSQuoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\'')
Char
_ <- Char -> Parser Char
char Char
'\''
Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value
attrValueUnquoted :: TagName -> [Attr] -> AttrName -> Parser Token
attrValueUnquoted :: Text -> [Attr] -> Text -> Parser Token
attrValueUnquoted Text
tag [Attr]
attrs Text
name = do
Text
value <- (Char -> Bool) -> Parser Text
takeTill ((Char -> Bool) -> Parser Text) -> (Char -> Bool) -> Parser Text
forall a b. (a -> b) -> a -> b
$ Char -> Bool
isWhitespace (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'>'
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag (Text -> Text -> Attr
Attr Text
name Text
value Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag (Text -> Text -> Attr
Attr Text
name Text
value Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs)))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
afterAttrValueQuoted :: TagName -> [Attr] -> AttrName -> AttrValue -> Parser Token
afterAttrValueQuoted :: Text -> [Attr] -> Text -> Text -> Parser Token
afterAttrValueQuoted Text
tag [Attr]
attrs Text
name Text
value =
((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isWhitespace Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
beforeAttrName Text
tag [Attr]
attrs')
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'/' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> [Attr] -> Parser Token
selfClosingStartTag Text
tag [Attr]
attrs')
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> [Attr] -> Token
TagOpen Text
tag [Attr]
attrs'))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Token
endOfFileToken)
where attrs' :: [Attr]
attrs' = Text -> Text -> Attr
Attr Text
name Text
value Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
: [Attr]
attrs
bogusComment :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\x00' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
bogusComment (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"\xfffd"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Char
anyChar Parser Char -> (Char -> Parser Token) -> Parser Token
forall a b. Parser Text a -> (a -> Parser Text b) -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> Builder -> Parser Token
bogusComment (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.singleton Char
c))
markupDeclOpen :: Parser Token
markupDeclOpen :: Parser Token
markupDeclOpen =
Parser Token -> Parser Token
forall i a. Parser i a -> Parser i a
try Parser Token
comment_
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token -> Parser Token
forall i a. Parser i a -> Parser i a
try Parser Token
docType
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
bogusComment Builder
forall a. Monoid a => a
mempty
where
comment_ :: Parser Token
comment_ = Char -> Parser Char
char Char
'-' Parser Char -> Parser Char -> Parser Char
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
commentStart
docType :: Parser Token
docType = do
Text
s <- Int -> Parser Text
take Int
7
Bool -> Parser Text ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Parser Text ()) -> Bool -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower Text
s Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"doctype"
Parser Token
doctype
commentStart :: Parser Token
= do
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
commentStartDash)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
forall a. Monoid a => a
mempty))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment Builder
forall a. Monoid a => a
mempty
commentStartDash :: Parser Token
=
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd Builder
forall a. Monoid a => a
mempty)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
forall a. Monoid a => a
mempty))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
forall a. Monoid a => a
mempty))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Char -> Builder
B.singleton Char
'-'))
comment :: Builder -> Parser Token
Builder
content0 = do
Builder
content <- Text -> Builder
B.fromText (Text -> Builder) -> Parser Text -> Parser Text Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char -> Bool
isC Char
'-' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'\x00' (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
`orC` Char -> Char -> Bool
isC Char
'<'))
Parser Token -> Parser Token
forall a. a -> a
id (Parser Token -> Parser Token) -> Parser Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThan (Builder
content0 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"<"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndDash (Builder
content0 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'\x00' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment (Builder
content0 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
B.singleton Char
'\xfffd'))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment (Builder -> Token) -> Builder -> Token
forall a b. (a -> b) -> a -> b
$ Builder
content0 Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
content))
commentLessThan :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'!' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBang (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"!"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThan (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"<"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment Builder
content
commentLessThanBang :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBangDash Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
comment Builder
content
commentLessThanBangDash :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentLessThanBangDashDash Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
commentEndDash Builder
content
commentLessThanBangDashDash :: Builder -> Parser Token
Builder
content =
(Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
comment Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Builder -> Parser Token
commentEnd Builder
content
commentEndDash :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"-"))
commentEnd :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'!' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndBang Builder
content)
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEnd (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"-"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"--"))
commentEndBang :: Builder -> Parser Token
Builder
content = do
(Char -> Parser Char
char Char
'-' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Builder -> Parser Token
commentEndDash (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"--!"))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser Char
char Char
'>' Parser Char -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Token -> Parser Token
forall a b. Parser Text a -> Parser Text b -> Parser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder -> Token
Comment Builder
content))
Parser Token -> Parser Token -> Parser Token
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Builder -> Parser Token
comment (Builder
content Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
"--!"))
doctype :: Parser Token
doctype :: Parser Token
doctype = do
Text
content <- (Char -> Bool) -> Parser Text
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>')
Char
_ <- Char -> Parser Char
char Char
'>'
Token -> Parser Token
forall a. a -> Parser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ Text -> Token
Doctype Text
content
parseTokens :: Text -> [Token]
parseTokens :: Text -> [Token]
parseTokens = (Text -> Maybe (Token, Text)) -> Text -> [Token]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Text -> Maybe (Token, Text)
f
where
f :: Text -> Maybe (Token, Text)
f :: Text -> Maybe (Token, Text)
f Text
t
| Text -> Bool
T.null Text
t = Maybe (Token, Text)
forall a. Maybe a
Nothing
| Bool
otherwise =
case Parser Token -> Text -> Result Token
forall a. Parser a -> Text -> Result a
parse Parser Token
token Text
t of
Done Text
rest Token
tok -> (Token, Text) -> Maybe (Token, Text)
forall a. a -> Maybe a
Just (Token
tok, Text
rest)
Partial Text -> Result Token
cont ->
case Text -> Result Token
cont Text
forall a. Monoid a => a
mempty of
Done Text
rest Token
tok -> (Token, Text) -> Maybe (Token, Text)
forall a. a -> Maybe a
Just (Token
tok, Text
rest)
Result Token
_ -> Maybe (Token, Text)
forall a. Maybe a
Nothing
Result Token
_ -> Maybe (Token, Text)
forall a. Maybe a
Nothing
parseTokensLazy :: TL.Text -> [Token]
parseTokensLazy :: Text -> [Token]
parseTokensLazy = (Text -> Maybe (Token, Text)) -> Text -> [Token]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr Text -> Maybe (Token, Text)
f
where
f :: TL.Text -> Maybe (Token, TL.Text)
f :: Text -> Maybe (Token, Text)
f Text
t
| Text -> Bool
TL.null Text
t = Maybe (Token, Text)
forall a. Maybe a
Nothing
| Bool
otherwise =
case Parser Token -> Text -> Result Token
forall a. Parser a -> Text -> Result a
AL.parse Parser Token
token Text
t of
AL.Done Text
rest Token
tok -> (Token, Text) -> Maybe (Token, Text)
forall a. a -> Maybe a
Just (Token
tok, Text
rest)
Result Token
_ -> Maybe (Token, Text)
forall a. Maybe a
Nothing
renderTokens :: [Token] -> TL.Text
renderTokens :: [Token] -> Text
renderTokens = [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> ([Token] -> [Text]) -> [Token] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Text) -> [Token] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Text
renderToken
renderToken :: Token -> TL.Text
renderToken :: Token -> Text
renderToken = Text -> Text
TL.fromStrict (Text -> Text) -> (Token -> Text) -> Token -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> (Token -> [Text]) -> Token -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. \case
(TagOpen Text
n []) -> [Text
"<", Text
n, Text
">"]
(TagOpen Text
n [Attr]
attrs) -> [Text
"<", Text
n, Text
" ", [Attr] -> Text
renderAttrs [Attr]
attrs, Text
">"]
(TagSelfClose Text
n [Attr]
attrs) -> [Text
"<", Text
n, Text
" ", [Attr] -> Text
renderAttrs [Attr]
attrs, Text
" />"]
(TagClose Text
n) -> [Text
"</", Text
n, Text
">"]
(ContentChar Char
c) -> [Char -> Text
T.singleton Char
c]
(ContentText Text
t) -> [Text
t]
(Comment Builder
builder) -> [Text
"<!--", Text -> Text
TL.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Builder -> Text
B.toLazyText Builder
builder, Text
"-->"]
(Doctype Text
t) -> [Text
"<!DOCTYPE", Text
t, Text
">"]
renderAttrs :: [Attr] -> Text
renderAttrs :: [Attr] -> Text
renderAttrs = [Text] -> Text
T.unwords ([Text] -> Text) -> ([Attr] -> [Text]) -> [Attr] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> Text) -> [Attr] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Attr -> Text
renderAttr ([Attr] -> [Text]) -> ([Attr] -> [Attr]) -> [Attr] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Attr] -> [Attr]
forall a. [a] -> [a]
reverse
renderAttr :: Attr -> Text
renderAttr :: Attr -> Text
renderAttr (Attr Text
k Text
v) = [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat [Text
k, Text
"=\"", Text
v, Text
"\""]
canonicalizeTokens :: [Token] -> [Token]
canonicalizeTokens :: [Token] -> [Token]
canonicalizeTokens = (Token -> Bool) -> [Token] -> [Token]
forall a. (a -> Bool) -> [a] -> [a]
filter (Token -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/= Text -> Token
ContentText Text
"") ([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Token] -> [Token]
meldTextTokens
meldTextTokens :: [Token] -> [Token]
meldTextTokens :: [Token] -> [Token]
meldTextTokens = [Token] -> [Token]
concatTexts ([Token] -> [Token]) -> ([Token] -> [Token]) -> [Token] -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> Token) -> [Token] -> [Token]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Token -> Token
charToText
where
charToText :: Token -> Token
charToText (ContentChar Char
c) = Text -> Token
ContentText (Char -> Text
T.singleton Char
c)
charToText Token
t = Token
t
concatTexts :: [Token] -> [Token]
concatTexts = \case
(ContentText Text
t : ContentText Text
t' : [Token]
ts) -> [Token] -> [Token]
concatTexts ([Token] -> [Token]) -> [Token] -> [Token]
forall a b. (a -> b) -> a -> b
$ Text -> Token
ContentText (Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t') Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: [Token]
ts
(Token
t : [Token]
ts) -> Token
t Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: [Token] -> [Token]
concatTexts [Token]
ts
[] -> []