| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Text.Html.IsLink
Contents
- isLinkAttr :: String -> String -> Bool
- allLinkAttrs :: HashSet (String, String)
Documentation
isLinkAttr :: String -> String -> Bool Source
isLinkAttr tag attr returns True if the attribute attr of an HTML
element with tag name tag points to an external resource, and False
otherwise. So for example isLinkAttr "a" "href" returns True whereas
isLinkAttr "a" "class" returns False. Note that isLinkAttr
expects both tag and attr to be in lowercase, so for example
isLinkAttr "A" "HREF" returns False.
allLinkAttrs :: HashSet (String, String) Source
A HashSet that contains all combinations of tag names and attributes
that correspond to links.
Example with HXT
Here's an example illustrating how to use isLinkAttr with hxt in
order to extract all links from an HTML document:
{-# LANGUAGE Arrows #-}
import Text.Html.IsLink
import Text.XML.HXT.Core
-- returns a list of tuples containing the tag name, attribute name,
-- attribute value of all links
getAllLinks :: FilePath -> IO [(String, String, String)]
getAllLinks path = runX $ doc >>> multi getLink
where
doc = readDocument [withParseHTML yes, withWarnings no] path
getLink :: ArrowXml a => a XmlTree (String, String, String)
getLink = proc node -> do
tag <- getName -< node
attrbNode <- getAttrl -< node
attrb <- getName -< attrbNode
val <- xshow getChildren -< attrbNode
isLinkA -< (tag, attrb, val)
where
isLinkA = isLink `guardsP` this
isLink (tag, attrb, _) = isLinkAttr tag attrb