scrappy-core-0.1.0.1: html pattern matching library and high-level interface concurrent requests lib for webscraping
Safe HaskellNone
LanguageHaskell2010

Scrappy.Elem.Types

Synopsis

Documentation

class ShowHTML a where Source #

Methods

showH :: a -> String Source #

Instances

Instances details
ShowHTML Paragraph Source # 
Instance details

Defined in Scrappy.Elem.ITextElemParser

ShowHTML Sentence Source # 
Instance details

Defined in Scrappy.Elem.ITextElemParser

ShowHTML Text Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: Text -> String Source #

ShowHTML Char Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: Char -> String Source #

ShowHTML a => ShowHTML (Elem' a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: Elem' a -> String Source #

ShowHTML a => ShowHTML (TreeHTML a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: TreeHTML a -> String Source #

Show a => ShowHTML [a] Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: [a] -> String Source #

class ElementRep (a :: Type -> Type) where Source #

Methods

elTag :: a b -> Elem Source #

attrs :: a b -> Attrs Source #

innerText' :: a b -> String Source #

matches' :: a b -> [b] Source #

Instances

Instances details
ElementRep Elem' Source # 
Instance details

Defined in Scrappy.Elem.Types

ElementRep TreeHTML Source # 
Instance details

Defined in Scrappy.Elem.Types

class (ShowHTML c, ElementRep a) => InnerHTMLRep (a :: Type -> Type) (b :: Type -> Type) c | a c -> b c where Source #

Methods

foldHtmlMatcher :: [HTMLMatcher a c] -> b c Source #

innerText :: b c -> String Source #

matches :: b c -> [c] Source #

Instances

Instances details
ShowHTML c => InnerHTMLRep Elem' InnerTextResult c Source # 
Instance details

Defined in Scrappy.Elem.Types

ShowHTML c => InnerHTMLRep TreeHTML InnerTextHTMLTree c Source #

Tree func :: Show a => [HTMLMatcher a] -> InnerTextHTMLTree a -> InnerTextHTMLTree a func [] state = state func (htmlM:htmlMatchers) (InnerTextHTMLTree{..}) = case htmlM of IText str -> func htmlMatchers (InnerTextHTMLTree matches (innerText <> str) innerTree) -- | May need to enforce a Show Instance on mat Match mat -> func htmlMatchers (InnerTextHTMLTree (mat : matches) (innerText <> (show mat)) innerTree) --concat to fullInnerText Element htmlTree -> --interEl :: ElemHead [HtmlMatcher] func htmlMatchers (InnerTextHTMLTree (matches <> matches' htmlTree) (innerText <> treeElemToStr htmlTree) ((makeBranch htmlTree) : innerTree))

SimpleElem func :: (ShowHTML a, ElementRep e) => [HTMLMatcher e a] -> InnerTextResult a func state [] = state func InnerTextResult{..} (next:inners) = case next of IText str -> func (InnerTextResult matches (innerText <> str)) inners -- | May need to enforce a Show Instance on mat Match mat -> func (InnerTextResult (mat : matches) (innerText <> (showH mat))) inners --concat to fullInnerText Element elem -> --interEl :: ElemHead [HtmlMatcher] func (InnerTextResult (matches' elem <> matches) (innerText <> (innerText' elem))) inners

Instance details

Defined in Scrappy.Elem.Types

noPat :: forall s u (m :: Type -> Type). Maybe (ParsecT s u m String) Source #

coerceAttrs :: Attrs -> [(String, Maybe String)] Source #

Parser is configured via the return type but gives the input type

data TreeHTML a Source #

Note, this is the representation i'll be using

Instances

Instances details
ElementRep TreeHTML Source # 
Instance details

Defined in Scrappy.Elem.Types

ShowHTML c => InnerHTMLRep TreeHTML InnerTextHTMLTree c Source #

Tree func :: Show a => [HTMLMatcher a] -> InnerTextHTMLTree a -> InnerTextHTMLTree a func [] state = state func (htmlM:htmlMatchers) (InnerTextHTMLTree{..}) = case htmlM of IText str -> func htmlMatchers (InnerTextHTMLTree matches (innerText <> str) innerTree) -- | May need to enforce a Show Instance on mat Match mat -> func htmlMatchers (InnerTextHTMLTree (mat : matches) (innerText <> (show mat)) innerTree) --concat to fullInnerText Element htmlTree -> --interEl :: ElemHead [HtmlMatcher] func htmlMatchers (InnerTextHTMLTree (matches <> matches' htmlTree) (innerText <> treeElemToStr htmlTree) ((makeBranch htmlTree) : innerTree))

SimpleElem func :: (ShowHTML a, ElementRep e) => [HTMLMatcher e a] -> InnerTextResult a func state [] = state func InnerTextResult{..} (next:inners) = case next of IText str -> func (InnerTextResult matches (innerText <> str)) inners -- | May need to enforce a Show Instance on mat Match mat -> func (InnerTextResult (mat : matches) (innerText <> (showH mat))) inners --concat to fullInnerText Element elem -> --interEl :: ElemHead [HtmlMatcher] func (InnerTextResult (matches' elem <> matches) (innerText <> (innerText' elem))) inners

Instance details

Defined in Scrappy.Elem.Types

Show a => Show (TreeHTML a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showsPrec :: Int -> TreeHTML a -> ShowS #

show :: TreeHTML a -> String #

showList :: [TreeHTML a] -> ShowS #

ShowHTML a => ShowHTML (TreeHTML a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: TreeHTML a -> String Source #

data InnerTextHTMLTree a Source #

Instances

Instances details
ShowHTML c => InnerHTMLRep TreeHTML InnerTextHTMLTree c Source #

Tree func :: Show a => [HTMLMatcher a] -> InnerTextHTMLTree a -> InnerTextHTMLTree a func [] state = state func (htmlM:htmlMatchers) (InnerTextHTMLTree{..}) = case htmlM of IText str -> func htmlMatchers (InnerTextHTMLTree matches (innerText <> str) innerTree) -- | May need to enforce a Show Instance on mat Match mat -> func htmlMatchers (InnerTextHTMLTree (mat : matches) (innerText <> (show mat)) innerTree) --concat to fullInnerText Element htmlTree -> --interEl :: ElemHead [HtmlMatcher] func htmlMatchers (InnerTextHTMLTree (matches <> matches' htmlTree) (innerText <> treeElemToStr htmlTree) ((makeBranch htmlTree) : innerTree))

SimpleElem func :: (ShowHTML a, ElementRep e) => [HTMLMatcher e a] -> InnerTextResult a func state [] = state func InnerTextResult{..} (next:inners) = case next of IText str -> func (InnerTextResult matches (innerText <> str)) inners -- | May need to enforce a Show Instance on mat Match mat -> func (InnerTextResult (mat : matches) (innerText <> (showH mat))) inners --concat to fullInnerText Element elem -> --interEl :: ElemHead [HtmlMatcher] func (InnerTextResult (matches' elem <> matches) (innerText <> (innerText' elem))) inners

Instance details

Defined in Scrappy.Elem.Types

Monoid (InnerTextHTMLTree a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Semigroup (InnerTextHTMLTree a) Source # 
Instance details

Defined in Scrappy.Elem.Types

data Elem' a Source #

node-like

Constructors

Elem' 

Instances

Instances details
ElementRep Elem' Source # 
Instance details

Defined in Scrappy.Elem.Types

ShowHTML c => InnerHTMLRep Elem' InnerTextResult c Source # 
Instance details

Defined in Scrappy.Elem.Types

Show a => Show (Elem' a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showsPrec :: Int -> Elem' a -> ShowS #

show :: Elem' a -> String #

showList :: [Elem' a] -> ShowS #

ShowHTML a => ShowHTML (Elem' a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showH :: Elem' a -> String Source #

type HMatcher' (a :: Type -> Type) (b :: Type -> Type) c = [HTMLMatcher b c] -> a c Source #

data HTMLMatcher (a :: Type -> Type) b Source #

Constructors

IText String 
Element (a b) 
Match b 

Instances

Instances details
(Show b, Show (a b)) => Show (HTMLMatcher a b) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showsPrec :: Int -> HTMLMatcher a b -> ShowS #

show :: HTMLMatcher a b -> String #

showList :: [HTMLMatcher a b] -> ShowS #

data HTMLBare (e :: Type -> Type) a Source #

Constructors

HTMLBare 

Fields

data AttrsError Source #

Constructors

IncorrectAttrs 

Instances

Instances details
Show AttrsError Source # 
Instance details

Defined in Scrappy.Elem.Types

data MessyTree a b Source #

At end will be able to do Eq2 of trees where tree params are (tag,attrs) | Need some "Flexible Equality" match

Would treeElemParser fail on cases like input with no end tag?

Constructors

Noise b 
Nodee a [MessyTree a b] 

data MessyTreeMatch a b c Source #

Constructors

Noise' a 
Match' b 
Node' c [MessyTreeMatch a b c] 

data GroupHtml (element :: Type -> Type) a Source #

Constructors

GroupHtml [element a] Glength MaxLength 

Instances

Instances details
(ElementRep e, Show (e a), Show a, ShowHTML a) => Show (GroupHtml e a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

showsPrec :: Int -> GroupHtml e a -> ShowS #

show :: GroupHtml e a -> String #

showList :: [GroupHtml e a] -> ShowS #

Eq (GroupHtml e a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

(==) :: GroupHtml e a -> GroupHtml e a -> Bool #

(/=) :: GroupHtml e a -> GroupHtml e a -> Bool #

Ord (GroupHtml e a) Source # 
Instance details

Defined in Scrappy.Elem.Types

Methods

compare :: GroupHtml e a -> GroupHtml e a -> Ordering #

(<) :: GroupHtml e a -> GroupHtml e a -> Bool #

(<=) :: GroupHtml e a -> GroupHtml e a -> Bool #

(>) :: GroupHtml e a -> GroupHtml e a -> Bool #

(>=) :: GroupHtml e a -> GroupHtml e a -> Bool #

max :: GroupHtml e a -> GroupHtml e a -> GroupHtml e a #

min :: GroupHtml e a -> GroupHtml e a -> GroupHtml e a #

ungroup :: ElementRep e => GroupHtml e a -> [e a] Source #

mkGH :: ElementRep e => [e a] -> GroupHtml e a Source #

maxLength :: [[a]] -> [a] Source #

biggestHtmlGroup :: forall (e :: Type -> Type) a. [GroupHtml e a] -> GroupHtml e a Source #

biggestGroup :: forall (e :: Type -> Type) a. ElementRep e => [GroupHtml e a] -> GroupHtml e a Source #

selfClosingElems :: [String] Source #

List of HTML self-closing (void) elements

foldFuncTup :: forall (e :: Type -> Type) a. (ShowHTML (e a), ShowHTML a, ElementRep e) => HTMLMatcher e a -> (String, [a]) -> (String, [a]) Source #

Bug found: matches start right way then get reversed

data Clickable Source #

In our failed test case with the command : parse f "" "a/div/a" where f :: (Stream s m Char) => ParsecT s u m (TreeHTML String); f = treeElemParser (Just ["a"]) Nothing []

we can tell that foldFuncTrup has been called twice (we believe)

we will test how an element named "div" inside of "a" element would behave

TODO(galen): As we advance scrappy we need to be more realistic in what a clickable is | since here we really have a LinkEl rather than a button (which is clickable but doesn't fit here) | and can emit a side effect such as that of a LinkEl or some other event like we can handle with lazy-js

Constructors

Clickable ElemHead Link 

Instances

Instances details
Show Clickable Source # 
Instance details

Defined in Scrappy.Elem.Types

Eq Clickable Source # 
Instance details

Defined in Scrappy.Elem.Types

mkClickableEH :: Bool -> LastUrl -> ElemHead -> Maybe Clickable Source #

In the future this definitely could be expanded upon for our JS interface | right now this only works for links but wouldn't literally click a button mkClickable :: ElemHead -> Elem' a -> Maybe Clickable mkClickable eHead emnt = do href <- getHref emnt pure $ Clickable eHead href

fst' :: (a, b, c) -> a Source #

snd' :: (a, b, c) -> b Source #

thd' :: (a, b, c) -> c Source #

endTag :: forall s (m :: Type -> Type) u. Stream s m Char => String -> ParsecT s u m String Source #

enoughMatches :: forall a s u (m :: Type -> Type). Int -> String -> Map String String -> (String, [a]) -> ParsecT s u m (Elem' a) Source #

enoughMatchesTree :: forall a s u (m :: Type -> Type). Int -> String -> Map String String -> (String, [a], Forest ElemHead) -> ParsecT s u m (TreeHTML a) Source #

selfClosingTextful :: forall a s (m :: Type -> Type) u (e :: Type -> Type). (ShowHTML a, Stream s m Char) => Maybe (ParsecT s u m a) -> ParsecT s u m [HTMLMatcher e a] Source #

Explanation: This is for the edge case of p tags that are allowed to "contain" text without actually having | an end tag | If i recall correctly, self-closing tags dont allow embedded elements, only plaintext. | this means that the text belonging to the tag read, is that up until the next HTML control section

data UrlPagination Source #

Instances

Instances details
Show UrlPagination Source # 
Instance details

Defined in Scrappy.Elem.Types

Eq UrlPagination Source # 
Instance details

Defined in Scrappy.Elem.Types