| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Scrappy.Elem.ChainHTML
Synopsis
- nl :: forall s (m :: Type -> Type) u. Stream s m Char => ParsecT s u m ()
- manyHtml :: forall {s} {m :: Type -> Type} {u} {a}. Stream s m Char => ParsecT s u m a -> ParsecT s u m [a]
- someHtml :: forall {s} {m :: Type -> Type} {u} {a}. Stream s m Char => ParsecT s u m a -> ParsecT s u m [a]
- manyTillHtml_ :: forall {s} {m :: Type -> Type} {u} {a} {end}. Stream s m Char => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
- htmlTag :: forall s (m :: Type -> Type) u. Stream s m Char => ParsecT s u m ElemHead
- manyTill_ :: forall s u (m :: Type -> Type) a end. ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end)
- clean :: String -> String
- mustContain :: forall s u (m :: Type -> Type) a b. ParsecT s u m (Elem' a) -> Int -> ParsecT s u m b -> ParsecT s u m (Elem' a)
- type Shell = (Elem, [(String, Maybe String)])
- contains'' :: forall s (m :: Type -> Type) a u. (Stream s m Char, ShowHTML a) => Shell -> ParsecT s u m a -> ParsecT s u m [a]
- parseInShell :: forall {s} {u} {m :: Type -> Type} {a} {b}. ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b
- contains :: forall s u (m :: Type -> Type) a b. ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b
- containsMany :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m [b]
- contains' :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m [b]
- containsFirst :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b
- sequenceHtml :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
- sequenceHtml_ :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
- (</>>) :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b
- (</>>=) :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b)
Documentation
manyHtml :: forall {s} {m :: Type -> Type} {u} {a}. Stream s m Char => ParsecT s u m a -> ParsecT s u m [a] Source #
someHtml :: forall {s} {m :: Type -> Type} {u} {a}. Stream s m Char => ParsecT s u m a -> ParsecT s u m [a] Source #
manyTillHtml_ :: forall {s} {m :: Type -> Type} {u} {a} {end}. Stream s m Char => ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end) Source #
manyTill_ :: forall s u (m :: Type -> Type) a end. ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m ([a], end) Source #
mustContain :: forall s u (m :: Type -> Type) a b. ParsecT s u m (Elem' a) -> Int -> ParsecT s u m b -> ParsecT s u m (Elem' a) Source #
type Shell = (Elem, [(String, Maybe String)]) Source #
An elem head configures the bracketing so this is all we need for | crafting
contains'' :: forall s (m :: Type -> Type) a u. (Stream s m Char, ShowHTML a) => Shell -> ParsecT s u m a -> ParsecT s u m [a] Source #
parseInShell :: forall {s} {u} {m :: Type -> Type} {a} {b}. ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b Source #
contains :: forall s u (m :: Type -> Type) a b. ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b Source #
Deprecated: this should have been called parseInShell from the start, you probably want contains' or containsFirst
This will be fully removed in the future | 99% of the time this is gonna be desired to pair with findNaive
containsMany :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m [b] Source #
finds multiple matches anywhere inside the passed elem
| This function is also quite extensible because when used with scrape
| this combo will return a list of list of elems where the hierarchy of HTML has been preserved
| but a great deal of information has been filtered out. An example use case would be knowing that
| you want p tags from a Set of very specific shells, this could allow you to analyze what and how many
| came from each shell.
| This also naturally extends to running this same scraper on multiple pages which would allow you to recover
| ample details on the number of match_A in shell_S on Page_P ~~ [[[MatchA]]] and this Match can be any
| arbitarily defined type. You can further imagine pairing with NLP analysis but this is a long enough point here.
contains' :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m [b] Source #
finds multiple matches anywhere inside the passed elem
| This function is also quite extensible because when used with scrape
| this combo will return a list of list of elems where the hierarchy of HTML has been preserved
| but a great deal of information has been filtered out. An example use case would be knowing that
| you want p tags from a Set of very specific shells, this could allow you to analyze what and how many
| came from each shell.
| This also naturally extends to running this same scraper on multiple pages which would allow you to recover
| ample details on the number of match_A in shell_S on Page_P ~~ [[[MatchA]]] and this Match can be any
| arbitarily defined type. You can further imagine pairing with NLP analysis but this is a long enough point here.
containsFirst :: forall a s u (m :: Type -> Type) b. ShowHTML a => ParsecT s u m (Elem' a) -> ParsecT String () Identity b -> ParsecT s u m b Source #
sequenceHtml :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m (a, b) Source #
sequenceHtml_ :: forall s (m :: Type -> Type) u a b. Stream s m Char => ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m b Source #