{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
module Scrappy.Elem.ChainHTML where
import Scrappy.Find (findNaive)
import Scrappy.Links (maybeUsefulUrl)
import Scrappy.Elem.SimpleElemParser (elemParser)
import Scrappy.Elem.ElemHeadParse (parseOpeningTag, hrefParser)
import Scrappy.Elem.Types (Elem', ShowHTML, ElemHead, Elem, innerText'
, matches')
import Control.Monad.Trans.Maybe (MaybeT)
import Text.Parsec (ParsecT, Stream, char, (<|>), many, parserFail, parse, parserZero, string, optional)
import Control.Applicative (some, liftA2)
import Data.Functor.Identity (Identity)
import Data.Maybe (catMaybes)
nl :: Stream s m Char => ParsecT s u m ()
nl :: forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
nl = ParsecT s u m [Char] -> ParsecT s u m ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (ParsecT s u m Char -> ParsecT s u m [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (ParsecT s u m Char -> ParsecT s u m [Char])
-> ParsecT s u m Char -> ParsecT s u m [Char]
forall a b. (a -> b) -> a -> b
$ (Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' '))
manyHtml :: ParsecT s u m a -> ParsecT s u m [a]
manyHtml ParsecT s u m a
p = ParsecT s u m a -> ParsecT s u m [a]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (ParsecT s u m a -> ParsecT s u m [a])
-> ParsecT s u m a -> ParsecT s u m [a]
forall a b. (a -> b) -> a -> b
$ ParsecT s u m a
p ParsecT s u m a -> ParsecT s u m () -> ParsecT s u m a
forall a b. ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT s u m ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
nl
someHtml :: ParsecT s u m a -> ParsecT s u m [a]
someHtml ParsecT s u m a
p = ParsecT s u m a -> ParsecT s u m [a]
forall a. ParsecT s u m a -> ParsecT s u m [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (ParsecT s u m a -> ParsecT s u m [a])
-> ParsecT s u m a -> ParsecT s u m [a]
forall a b. (a -> b) -> a -> b
$ ParsecT s u m a
p ParsecT s u m a -> ParsecT s u m () -> ParsecT s u m a
forall a b. ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT s u m ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
nl
manyTillHtml_ :: ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
manyTillHtml_ ParsecT s u m a
p ParsecT s u m end
end = ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
forall s u (m :: * -> *) a end.
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
manyTill_ (ParsecT s u m a
p ParsecT s u m a -> ParsecT s u m () -> ParsecT s u m a
forall a b. ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT s u m ()
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
nl) ParsecT s u m end
end
htmlTag :: Stream s m Char => ParsecT s u m ElemHead
htmlTag :: forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ElemHead
htmlTag = Maybe [[Char]]
-> [([Char], Maybe [Char])] -> ParsecT s u m ElemHead
forall s (m :: * -> *) u.
Stream s m Char =>
Maybe [[Char]]
-> [([Char], Maybe [Char])] -> ParsecT s u m ElemHead
parseOpeningTag ([[Char]] -> Maybe [[Char]]
forall a. a -> Maybe a
Just [[Char]
"html"]) []
manyTill_ :: ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
manyTill_ :: forall s u (m :: * -> *) a end.
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
manyTill_ ParsecT s u m a
p ParsecT s u m end
end = ParsecT s u m ([a], end)
go
where
go :: ParsecT s u m ([a], end)
go = (([],) (end -> ([a], end))
-> ParsecT s u m end -> ParsecT s u m ([a], end)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT s u m end
end) ParsecT s u m ([a], end)
-> ParsecT s u m ([a], end) -> ParsecT s u m ([a], end)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (a -> ([a], end) -> ([a], end))
-> ParsecT s u m a
-> ParsecT s u m ([a], end)
-> ParsecT s u m ([a], end)
forall a b c.
(a -> b -> c)
-> ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (\a
x ([a]
xs, end
y) -> (a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
xs, end
y)) ParsecT s u m a
p ParsecT s u m ([a], end)
go
clean :: String -> String
clean :: [Char] -> [Char]
clean = [Char] -> [Char]
forall a. HasCallStack => a
undefined
mustContain :: ParsecT s u m (Elem' a) -> Int -> ParsecT s u m b -> ParsecT s u m (Elem' a)
mustContain :: forall s u (m :: * -> *) a b.
ParsecT s u m (Elem' a)
-> Int -> ParsecT s u m b -> ParsecT s u m (Elem' a)
mustContain ParsecT s u m (Elem' a)
e Int
count ParsecT s u m b
pat = do
Elem' a
out <- ParsecT s u m (Elem' a)
e
case Parsec [Char] () (Maybe [[Char]])
-> [Char] -> [Char] -> Either ParseError (Maybe [[Char]])
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
parse (ParsecT [Char] () Identity [Char]
-> Parsec [Char] () (Maybe [[Char]])
forall s (m :: * -> *) u a.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m (Maybe [a])
findNaive (ParsecT [Char] () Identity [Char]
-> Parsec [Char] () (Maybe [[Char]]))
-> ParsecT [Char] () Identity [Char]
-> Parsec [Char] () (Maybe [[Char]])
forall a b. (a -> b) -> a -> b
$ [Char] -> ParsecT [Char] () Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"Search") [Char]
"" (Elem' a -> [Char]
forall b. Elem' b -> [Char]
forall (a :: * -> *) b. ElementRep a => a b -> [Char]
innerText' Elem' a
out) of
Right (Just [[Char]]
xs) -> if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> ([[Char]] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [[Char]]
xs) then ParsecT s u m (Elem' a)
forall s u (m :: * -> *) a. ParsecT s u m a
parserZero else Elem' a -> ParsecT s u m (Elem' a)
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return Elem' a
out
Either ParseError (Maybe [[Char]])
_ -> ParsecT s u m (Elem' a)
forall s u (m :: * -> *) a. ParsecT s u m a
parserZero
type Shell = (Elem, [(String, Maybe String)])
contains'' :: (Stream s m Char, ShowHTML a) => Shell
-> ParsecT s u m a
-> ParsecT s u m [a]
contains'' :: forall s (m :: * -> *) a u.
(Stream s m Char, ShowHTML a) =>
Shell -> ParsecT s u m a -> ParsecT s u m [a]
contains'' ([Char]
e,[([Char], Maybe [Char])]
as) ParsecT s u m a
p = Elem' a -> [a]
forall b. Elem' b -> [b]
forall (a :: * -> *) b. ElementRep a => a b -> [b]
matches' (Elem' a -> [a]) -> ParsecT s u m (Elem' a) -> ParsecT s u m [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe [[Char]]
-> Maybe (ParsecT s u m a)
-> [([Char], Maybe [Char])]
-> ParsecT s u m (Elem' a)
forall a s (m :: * -> *) u.
(ShowHTML a, Stream s m Char) =>
Maybe [[Char]]
-> Maybe (ParsecT s u m a)
-> [([Char], Maybe [Char])]
-> ParsecT s u m (Elem' a)
elemParser ([[Char]] -> Maybe [[Char]]
forall a. a -> Maybe a
Just [[Char]
e]) (ParsecT s u m a -> Maybe (ParsecT s u m a)
forall a. a -> Maybe a
Just ParsecT s u m a
p) [([Char], Maybe [Char])]
as
parseInShell :: ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m b
parseInShell = ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m b
forall s u (m :: * -> *) a b.
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m b
contains
{-# DEPRECATED contains "this should have been called parseInShell from the start, you probably want contains' or containsFirst" #-}
contains :: ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b
contains :: forall s u (m :: * -> *) a b.
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m b
contains ParsecT s u m (Elem' a)
shell ParsecT [Char] () Identity b
b = do
Elem' a
x <- ParsecT s u m (Elem' a)
shell
let
ridNL :: ParsecT s u m b -> ParsecT s u m b
ridNL ParsecT s u m b
p = (ParsecT s u m Char -> ParsecT s u m [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n')) ParsecT s u m [Char] -> ParsecT s u m b -> ParsecT s u m b
forall a b. ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT s u m b
p
case ParsecT [Char] () Identity b
-> [Char] -> [Char] -> Either ParseError b
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
parse (ParsecT [Char] () Identity b -> ParsecT [Char] () Identity b
forall {s} {m :: * -> *} {u} {b}.
Stream s m Char =>
ParsecT s u m b -> ParsecT s u m b
ridNL ParsecT [Char] () Identity b
b) [Char]
"" (Elem' a -> [Char]
forall b. Elem' b -> [Char]
forall (a :: * -> *) b. ElementRep a => a b -> [Char]
innerText' Elem' a
x) of
Right b
match -> b -> ParsecT s u m b
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return b
match
Left ParseError
err -> [Char] -> ParsecT s u m b
forall s u (m :: * -> *) a. [Char] -> ParsecT s u m a
parserFail (ParseError -> [Char]
forall a. Show a => a -> [Char]
show ParseError
err)
containsMany, contains'
:: ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT String () Identity b
-> ParsecT s u m [b]
containsMany :: forall a s u (m :: * -> *) b.
ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
containsMany = ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
forall a s u (m :: * -> *) b.
ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
contains'
contains' :: forall a s u (m :: * -> *) b.
ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
contains' ParsecT s u m (Elem' a)
shell ParsecT [Char] () Identity b
b = do
Elem' a
x <- ParsecT s u m (Elem' a)
shell
case Parsec [Char] () (Maybe [b])
-> [Char] -> [Char] -> Either ParseError (Maybe [b])
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
parse (ParsecT [Char] () Identity b -> Parsec [Char] () (Maybe [b])
forall s (m :: * -> *) u a.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m (Maybe [a])
findNaive ParsecT [Char] () Identity b
b) [Char]
"" (Elem' a -> [Char]
forall b. Elem' b -> [Char]
forall (a :: * -> *) b. ElementRep a => a b -> [Char]
innerText' Elem' a
x) of
Right (Just [b]
matches) -> [b] -> ParsecT s u m [b]
forall a. a -> ParsecT s u m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [b]
matches
Left ParseError
err -> [Char] -> ParsecT s u m [b]
forall s u (m :: * -> *) a. [Char] -> ParsecT s u m a
parserFail (ParseError -> [Char]
forall a. Show a => a -> [Char]
show ParseError
err)
Right Maybe [b]
Nothing -> [Char] -> ParsecT s u m [b]
forall s u (m :: * -> *) a. [Char] -> ParsecT s u m a
parserFail [Char]
"no matches in this container"
containsFirst :: ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT String () Identity b
-> ParsecT s u m b
containsFirst :: forall a s u (m :: * -> *) b.
ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m b
containsFirst ParsecT s u m (Elem' a)
shell ParsecT [Char] () Identity b
b = [b] -> b
forall a. HasCallStack => [a] -> a
head ([b] -> b) -> ParsecT s u m [b] -> ParsecT s u m b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
forall a s u (m :: * -> *) b.
ShowHTML a =>
ParsecT s u m (Elem' a)
-> ParsecT [Char] () Identity b -> ParsecT s u m [b]
contains' ParsecT s u m (Elem' a)
shell ParsecT [Char] () Identity b
b
sequenceHtml :: Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
sequenceHtml :: forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
sequenceHtml ParsecT s u m a
p1 ParsecT s u m b
p2 = do
a
x <- ParsecT s u m a
p1
[Char]
_ <- ParsecT s u m Char -> ParsecT s u m [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\t')
b
y <- ParsecT s u m b
p2
(a, b) -> ParsecT s u m (a, b)
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return (a
x, b
y)
sequenceHtml_ :: Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
sequenceHtml_ :: forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
sequenceHtml_ ParsecT s u m a
p1 ParsecT s u m b
p2 = do
a
_ <- ParsecT s u m a
p1
[Char]
_ <- ParsecT s u m Char -> ParsecT s u m [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
' ' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\n' ParsecT s u m Char -> ParsecT s u m Char -> ParsecT s u m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\t')
ParsecT s u m b
p2
(</>>) :: Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
</>> :: forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
(</>>) = ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
sequenceHtml_
(</>>=) :: Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
</>>= :: forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
(</>>=) = ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
forall s (m :: * -> *) u a b.
Stream s m Char =>
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
sequenceHtml