{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Text.HTML.Tree
(
tokensToForest
, ParseTokenForestError(..), PStack(..)
, nonClosing
, tokensFromForest
, tokensFromTree
) where
import Data.Monoid
import Data.Text (Text)
import Data.Tree
import Prelude
import Text.HTML.Parser
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest :: [Token] -> Either ParseTokenForestError (Forest Token)
tokensToForest = PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] [])
where
f :: PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (PStack Forest Token
ss []) [] = Forest Token -> Either ParseTokenForestError (Forest Token)
forall a b. b -> Either a b
Right (Forest Token -> Forest Token
forall a. [a] -> [a]
reverse Forest Token
ss)
f PStack
pstack [] = ParseTokenForestError
-> Either ParseTokenForestError (Forest Token)
forall a b. a -> Either a b
Left (ParseTokenForestError
-> Either ParseTokenForestError (Forest Token))
-> ParseTokenForestError
-> Either ParseTokenForestError (Forest Token)
forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack Maybe Token
forall a. Maybe a
Nothing
f PStack
pstack (Token
t : [Token]
ts) = case Token
t of
TagOpen TagName
n [Attr]
_ -> if TagName
n TagName -> [TagName] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [TagName]
nonClosing
then PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
else PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushParent Token
t PStack
pstack) [Token]
ts
TagSelfClose {} -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
TagClose TagName
n -> (PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
`f` [Token]
ts) (PStack -> Either ParseTokenForestError (Forest Token))
-> Either ParseTokenForestError PStack
-> Either ParseTokenForestError (Forest Token)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n PStack
pstack
ContentChar Char
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
ContentText TagName
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
Comment Builder
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
Doctype TagName
_ -> PStack -> [Token] -> Either ParseTokenForestError (Forest Token)
f (Token -> PStack -> PStack
pushFlatSibling Token
t PStack
pstack) [Token]
ts
nonClosing :: [Text]
nonClosing :: [TagName]
nonClosing = [TagName
"br", TagName
"hr", TagName
"img", TagName
"meta", TagName
"area", TagName
"base", TagName
"col", TagName
"embed", TagName
"input", TagName
"link", TagName
"param", TagName
"source", TagName
"track", TagName
"wbr"]
data ParseTokenForestError =
ParseTokenForestErrorBracketMismatch PStack (Maybe Token)
deriving (ParseTokenForestError -> ParseTokenForestError -> Bool
(ParseTokenForestError -> ParseTokenForestError -> Bool)
-> (ParseTokenForestError -> ParseTokenForestError -> Bool)
-> Eq ParseTokenForestError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: ParseTokenForestError -> ParseTokenForestError -> Bool
== :: ParseTokenForestError -> ParseTokenForestError -> Bool
$c/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
/= :: ParseTokenForestError -> ParseTokenForestError -> Bool
Eq, Int -> ParseTokenForestError -> ShowS
[ParseTokenForestError] -> ShowS
ParseTokenForestError -> String
(Int -> ParseTokenForestError -> ShowS)
-> (ParseTokenForestError -> String)
-> ([ParseTokenForestError] -> ShowS)
-> Show ParseTokenForestError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> ParseTokenForestError -> ShowS
showsPrec :: Int -> ParseTokenForestError -> ShowS
$cshow :: ParseTokenForestError -> String
show :: ParseTokenForestError -> String
$cshowList :: [ParseTokenForestError] -> ShowS
showList :: [ParseTokenForestError] -> ShowS
Show)
data PStack = PStack
{ PStack -> Forest Token
_pstackToplevelSiblings :: Forest Token
, PStack -> [(Token, Forest Token)]
_pstackParents :: [(Token, Forest Token)]
}
deriving (PStack -> PStack -> Bool
(PStack -> PStack -> Bool)
-> (PStack -> PStack -> Bool) -> Eq PStack
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PStack -> PStack -> Bool
== :: PStack -> PStack -> Bool
$c/= :: PStack -> PStack -> Bool
/= :: PStack -> PStack -> Bool
Eq, Int -> PStack -> ShowS
[PStack] -> ShowS
PStack -> String
(Int -> PStack -> ShowS)
-> (PStack -> String) -> ([PStack] -> ShowS) -> Show PStack
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PStack -> ShowS
showsPrec :: Int -> PStack -> ShowS
$cshow :: PStack -> String
show :: PStack -> String
$cshowList :: [PStack] -> ShowS
showList :: [PStack] -> ShowS
Show)
pushParent :: Token -> PStack -> PStack
pushParent :: Token -> PStack -> PStack
pushParent Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack [] ((Token
t, Forest Token
ss) (Token, Forest Token)
-> [(Token, Forest Token)] -> [(Token, Forest Token)]
forall a. a -> [a] -> [a]
: [(Token, Forest Token)]
ps)
popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent :: TagName -> PStack -> Either ParseTokenForestError PStack
popParent TagName
n (PStack Forest Token
ss ((p :: Token
p@(TagOpen TagName
n' [Attr]
_), Forest Token
ss') : [(Token, Forest Token)]
ps))
| TagName
n TagName -> TagName -> Bool
forall a. Eq a => a -> a -> Bool
== TagName
n' = PStack -> Either ParseTokenForestError PStack
forall a b. b -> Either a b
Right (PStack -> Either ParseTokenForestError PStack)
-> PStack -> Either ParseTokenForestError PStack
forall a b. (a -> b) -> a -> b
$ Forest Token -> [(Token, Forest Token)] -> PStack
PStack (Token -> Forest Token -> Tree Token
forall a. a -> [Tree a] -> Tree a
Node Token
p (Forest Token -> Forest Token
forall a. [a] -> [a]
reverse Forest Token
ss) Tree Token -> Forest Token -> Forest Token
forall a. a -> [a] -> [a]
: Forest Token
ss') [(Token, Forest Token)]
ps
popParent TagName
n PStack
pstack
= ParseTokenForestError -> Either ParseTokenForestError PStack
forall a b. a -> Either a b
Left (ParseTokenForestError -> Either ParseTokenForestError PStack)
-> ParseTokenForestError -> Either ParseTokenForestError PStack
forall a b. (a -> b) -> a -> b
$ PStack -> Maybe Token -> ParseTokenForestError
ParseTokenForestErrorBracketMismatch PStack
pstack (Token -> Maybe Token
forall a. a -> Maybe a
Just (Token -> Maybe Token) -> Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ TagName -> Token
TagClose TagName
n)
pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling :: Token -> PStack -> PStack
pushFlatSibling Token
t (PStack Forest Token
ss [(Token, Forest Token)]
ps) = Forest Token -> [(Token, Forest Token)] -> PStack
PStack (Token -> Forest Token -> Tree Token
forall a. a -> [Tree a] -> Tree a
Node Token
t [] Tree Token -> Forest Token -> Forest Token
forall a. a -> [a] -> [a]
: Forest Token
ss) [(Token, Forest Token)]
ps
tokensFromForest :: Forest Token -> [Token]
tokensFromForest :: Forest Token -> [Token]
tokensFromForest = [[Token]] -> [Token]
forall a. Monoid a => [a] -> a
mconcat ([[Token]] -> [Token])
-> (Forest Token -> [[Token]]) -> Forest Token -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Tree Token -> [Token]) -> Forest Token -> [[Token]]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Tree Token -> [Token]
tokensFromTree
tokensFromTree :: Tree Token -> [Token]
tokensFromTree :: Tree Token -> [Token]
tokensFromTree (Node o :: Token
o@(TagOpen TagName
n [Attr]
_) Forest Token
ts) | TagName
n TagName -> [TagName] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [TagName]
nonClosing
= [Token
o] [Token] -> [Token] -> [Token]
forall a. Semigroup a => a -> a -> a
<> Forest Token -> [Token]
tokensFromForest Forest Token
ts [Token] -> [Token] -> [Token]
forall a. Semigroup a => a -> a -> a
<> [TagName -> Token
TagClose TagName
n]
tokensFromTree (Node Token
t [])
= [Token
t]
tokensFromTree Tree Token
_
= String -> [Token]
forall a. HasCallStack => String -> a
error String
"renderTokenTree: leaf node with children."