{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
module Scrappy.Links where
import Control.Monad (join)
import qualified Network.URI as NURI
import Text.URI (URI, uriQuery, mkURI, uriPath, unRText, emptyURI, uriScheme, uriAuthority, RTextLabel(..))
import Control.Lens ((^.))
import qualified Text.URI.Lens as UL
import Text.Parsec (ParsecT, Stream )
import Data.Functor.Classes (eq1)
import Data.Map (Map)
import Data.Either (fromRight, isRight)
import Data.Maybe (catMaybes, fromJust, fromMaybe)
import Data.List (isSuffixOf, isInfixOf, isPrefixOf)
import qualified Data.List.NonEmpty as NE (length, last)
import Data.Text (Text, pack, unpack, splitOn
)
import Data.Char (toLower)
import Data.Aeson.TH (defaultOptions, deriveJSON)
type PageNumber = Int
type BaseUrl = Link
type Url = String
type HrefURI = String
type CurrentUrl = Url
type DOI = String
type Src = Url
type RelativeUrl = Url
fixRelativeUrl :: BaseUrl -> Url -> Url
fixRelativeUrl :: Link -> Url -> Url
fixRelativeUrl (Link Url
bUrl) Url
url
| Url
url Url -> Url -> Bool
forall a. Eq a => a -> a -> Bool
== Url
"" = Url
bUrl
| Url
url Url -> Url -> Bool
forall a. Eq a => a -> a -> Bool
== Url
"/" = Url
bUrl
| Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isInfixOf Url
bUrl Url
url = Url
url
| Url -> Char
forall a. HasCallStack => [a] -> a
last Url
bUrl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
&& (Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf Url
"/" Url
url) = Url
bUrl Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> (Url -> Url
forall a. HasCallStack => [a] -> [a]
tail Url
url)
| Url -> Char
forall a. HasCallStack => [a] -> a
last Url
bUrl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
&& (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf Url
"/" Url
url) = Url
bUrl Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> Url
url
| Url -> Char
forall a. HasCallStack => [a] -> a
last Url
bUrl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& (Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf Url
"/" Url
url) = Url
bUrl Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> Url
url
| Url -> Char
forall a. HasCallStack => [a] -> a
last Url
bUrl Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' Bool -> Bool -> Bool
&& (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf Url
"/" Url
url) = Url
bUrl Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> Url
"/" Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> Url
url
getHtmlStateful :: Url -> String
getHtmlStateful :: Url -> Url
getHtmlStateful = Url -> Url
forall a. HasCallStack => a
undefined
type LastUrl = Link
type Href = String
fixSameSiteURL :: LastUrl -> Href -> Maybe Url
fixSameSiteURL :: Link -> Url -> Maybe Url
fixSameSiteURL Link
lastUrl Url
href = Maybe Url
forall a. HasCallStack => a
undefined
fixURL :: LastUrl -> Href -> Url
fixURL :: Link -> Url -> Url
fixURL Link
previous Url
href =
let
base :: Link
base = if Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isPrefixOf Url
"/" Url
href then Maybe Link -> Link
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Link -> Link) -> Maybe Link -> Link
forall a b. (a -> b) -> a -> b
$ Link -> Maybe Link
deriveBaseUrl Link
previous else Link
previous
hrefURI :: Maybe URI
hrefURI = Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Text -> Maybe URI) -> (Url -> Text) -> Url -> Maybe URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Text
pack (Url -> Maybe URI) -> Url -> Maybe URI
forall a b. (a -> b) -> a -> b
$ Url
href
in
case Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme))
-> Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme)
forall a b. (a -> b) -> a -> b
$ URI -> Maybe (RText 'Scheme)
uriScheme (URI -> Maybe (RText 'Scheme))
-> Maybe URI -> Maybe (Maybe (RText 'Scheme))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe URI
hrefURI of
Just RText 'Scheme
_ -> Url
href
Maybe (RText 'Scheme)
Nothing -> Link -> Url -> Url
fixRelativeUrl Link
base Url
href
deriveBaseUrl :: Link -> Maybe BaseUrl
deriveBaseUrl :: Link -> Maybe Link
deriveBaseUrl (Link Url
url) = URI -> Maybe Link
mkBaseUrl (URI -> Maybe Link) -> Maybe URI -> Maybe Link
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Text -> Maybe URI) -> (Url -> Text) -> Url -> Maybe URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Text
pack (Url -> Maybe URI) -> Url -> Maybe URI
forall a b. (a -> b) -> a -> b
$ Url
url)
mkBaseUrl :: URI -> Maybe Link
mkBaseUrl :: URI -> Maybe Link
mkBaseUrl URI
uri = do
Text
scheme <- (RText 'Scheme -> Text) -> Maybe (RText 'Scheme) -> Maybe Text
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap RText 'Scheme -> Text
forall (l :: RTextLabel). RText l -> Text
unRText (Maybe (RText 'Scheme) -> Maybe Text)
-> Maybe (RText 'Scheme) -> Maybe Text
forall a b. (a -> b) -> a -> b
$ URI
uri URI
-> Getting (Maybe (RText 'Scheme)) URI (Maybe (RText 'Scheme))
-> Maybe (RText 'Scheme)
forall s a. s -> Getting a s a -> a
^. Getting (Maybe (RText 'Scheme)) URI (Maybe (RText 'Scheme))
Lens' URI (Maybe (RText 'Scheme))
UL.uriScheme
Text
host <- case URI
uri URI
-> Getting (Either Bool Authority) URI (Either Bool Authority)
-> Either Bool Authority
forall s a. s -> Getting a s a -> a
^. Getting (Either Bool Authority) URI (Either Bool Authority)
Lens' URI (Either Bool Authority)
UL.uriAuthority of
Right Authority
author -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ RText 'Host -> Text
forall (l :: RTextLabel). RText l -> Text
unRText (RText 'Host -> Text) -> RText 'Host -> Text
forall a b. (a -> b) -> a -> b
$ Authority
author Authority
-> Getting (RText 'Host) Authority (RText 'Host) -> RText 'Host
forall s a. s -> Getting a s a -> a
^. Getting (RText 'Host) Authority (RText 'Host)
Lens' Authority (RText 'Host)
UL.authHost
Left Bool
_ -> Maybe Text
forall a. Maybe a
Nothing
Link -> Maybe Link
forall a. a -> Maybe a
Just (Link -> Maybe Link) -> (Url -> Link) -> Url -> Maybe Link
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Link
Link (Url -> Maybe Link) -> Url -> Maybe Link
forall a b. (a -> b) -> a -> b
$ (Text -> Url
unpack Text
scheme) Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> (Url
"://") Url -> Url -> Url
forall a. Semigroup a => a -> a -> a
<> (Text -> Url
unpack Text
host)
class IsLink a where
renderLink :: a -> Url
getFileName :: Link -> Maybe String
getFileName :: Link -> Maybe Url
getFileName = Link -> Maybe Url
getLastPath
doiParser :: ParsecT s u m DOI
doiParser :: forall s u (m :: * -> *). ParsecT s u m Url
doiParser = ParsecT s u m Url
forall a. HasCallStack => a
undefined
data ReferenceSys = RefSys [String] [String]
type GeneratedLink = String
type Namespace = Text
type Option = Text
data QParams = Opt (Map Namespace [Option]) | SimpleKV (Text, Text)
type SiteTree = [(Bool, Text)]
data DOMLink = Href' Href
| Src Url
| PlainLink Url
newtype Link = Link Url deriving (Link -> Link -> Bool
(Link -> Link -> Bool) -> (Link -> Link -> Bool) -> Eq Link
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Link -> Link -> Bool
== :: Link -> Link -> Bool
$c/= :: Link -> Link -> Bool
/= :: Link -> Link -> Bool
Eq, Int -> Link -> Url -> Url
[Link] -> Url -> Url
Link -> Url
(Int -> Link -> Url -> Url)
-> (Link -> Url) -> ([Link] -> Url -> Url) -> Show Link
forall a.
(Int -> a -> Url -> Url)
-> (a -> Url) -> ([a] -> Url -> Url) -> Show a
$cshowsPrec :: Int -> Link -> Url -> Url
showsPrec :: Int -> Link -> Url -> Url
$cshow :: Link -> Url
show :: Link -> Url
$cshowList :: [Link] -> Url -> Url
showList :: [Link] -> Url -> Url
Show, ReadPrec [Link]
ReadPrec Link
Int -> ReadS Link
ReadS [Link]
(Int -> ReadS Link)
-> ReadS [Link] -> ReadPrec Link -> ReadPrec [Link] -> Read Link
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS Link
readsPrec :: Int -> ReadS Link
$creadList :: ReadS [Link]
readList :: ReadS [Link]
$creadPrec :: ReadPrec Link
readPrec :: ReadPrec Link
$creadListPrec :: ReadPrec [Link]
readListPrec :: ReadPrec [Link]
Read, Eq Link
Eq Link =>
(Link -> Link -> Ordering)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Link)
-> (Link -> Link -> Link)
-> Ord Link
Link -> Link -> Bool
Link -> Link -> Ordering
Link -> Link -> Link
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Link -> Link -> Ordering
compare :: Link -> Link -> Ordering
$c< :: Link -> Link -> Bool
< :: Link -> Link -> Bool
$c<= :: Link -> Link -> Bool
<= :: Link -> Link -> Bool
$c> :: Link -> Link -> Bool
> :: Link -> Link -> Bool
$c>= :: Link -> Link -> Bool
>= :: Link -> Link -> Bool
$cmax :: Link -> Link -> Link
max :: Link -> Link -> Link
$cmin :: Link -> Link -> Link
min :: Link -> Link -> Link
Ord)
parseLink :: Bool -> Link -> Url -> Maybe Link
parseLink :: Bool -> Link -> Url -> Maybe Link
parseLink Bool
onlySameSite Link
lastLink Url
newLink =
case Url -> Bool
hasNoURIScheme Url
newLink of
Bool
True -> Link -> Maybe Link
forall a. a -> Maybe a
Just (Link -> Maybe Link) -> (Url -> Link) -> Url -> Maybe Link
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Link
Link (Url -> Maybe Link) -> Url -> Maybe Link
forall a b. (a -> b) -> a -> b
$ Link -> Url -> Url
fixRelativeUrl (Maybe Link -> Link
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Link -> Link) -> Maybe Link -> Link
forall a b. (a -> b) -> a -> b
$ Link -> Maybe Link
deriveBaseUrl Link
lastLink) Url
newLink
Bool
False -> case Url -> Bool
isHTTP Url
newLink of
Bool
False -> Maybe Link
forall a. Maybe a
Nothing
Bool
True -> case Bool
onlySameSite of
Bool
False -> Link -> Maybe Link
forall a. a -> Maybe a
Just (Link -> Maybe Link) -> (Url -> Link) -> Url -> Maybe Link
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Link
Link (Url -> Maybe Link) -> Url -> Maybe Link
forall a b. (a -> b) -> a -> b
$ Url
newLink
Bool
True -> case Url -> Link -> Bool
sameAuthority Url
newLink Link
lastLink of
Bool
False -> Maybe Link
forall a. Maybe a
Nothing
Bool
True -> Link -> Maybe Link
forall a. a -> Maybe a
Just (Link -> Maybe Link) -> (Url -> Link) -> Url -> Maybe Link
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Link
Link (Url -> Maybe Link) -> Url -> Maybe Link
forall a b. (a -> b) -> a -> b
$ Url
newLink
where
hasNoURIScheme :: Url -> Bool
hasNoURIScheme Url
url = (Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme))
-> Maybe (Maybe (RText 'Scheme)) -> Maybe (RText 'Scheme)
forall a b. (a -> b) -> a -> b
$ (URI -> Maybe (RText 'Scheme))
-> Maybe URI -> Maybe (Maybe (RText 'Scheme))
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Maybe (RText 'Scheme)
uriScheme (Maybe URI -> Maybe (Maybe (RText 'Scheme)))
-> Maybe URI -> Maybe (Maybe (RText 'Scheme))
forall a b. (a -> b) -> a -> b
$ Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Text -> Maybe URI) -> (Url -> Text) -> Url -> Maybe URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Text
pack (Url -> Maybe URI) -> Url -> Maybe URI
forall a b. (a -> b) -> a -> b
$ Url
url) Maybe (RText 'Scheme) -> Maybe (RText 'Scheme) -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe (RText 'Scheme)
forall a. Maybe a
Nothing
isHTTP :: Url -> Bool
isHTTP Url
url = Url -> [Url] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem (Url -> Maybe Url -> Url
forall a. a -> Maybe a -> a
fromMaybe Url
"" ((URI -> Url) -> Maybe URI -> Maybe Url
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Url
NURI.uriScheme (Maybe URI -> Maybe Url) -> Maybe URI -> Maybe Url
forall a b. (a -> b) -> a -> b
$ Url -> Maybe URI
NURI.parseURI Url
url)) [Url
"https:", Url
"http:"]
sameAuthority :: Url -> Link -> Bool
sameAuthority :: Url -> Link -> Bool
sameAuthority Url
href (Link Url
linky) =
let
getMainAuthority :: Url -> Text
getMainAuthority = [Text] -> Text
forall a. HasCallStack => [a] -> a
last ([Text] -> Text) -> (Url -> [Text]) -> Url -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
splitOn Text
"." (Text -> [Text]) -> (Url -> Text) -> Url -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Url -> Text
pack
getRegName :: Url -> Maybe Text
getRegName Url
l = (URIAuth -> Text) -> Maybe URIAuth -> Maybe Text
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Url -> Text
getMainAuthority (Url -> Text) -> (URIAuth -> Url) -> URIAuth -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URIAuth -> Url
NURI.uriRegName) (Maybe URIAuth -> Maybe Text) -> Maybe URIAuth -> Maybe Text
forall a b. (a -> b) -> a -> b
$ URI -> Maybe URIAuth
NURI.uriAuthority (URI -> Maybe URIAuth) -> Maybe URI -> Maybe URIAuth
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Url -> Maybe URI
NURI.parseURI Url
l
in case Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Text -> Text -> Bool) -> Maybe Text -> Maybe (Text -> Bool)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Url -> Maybe Text
getRegName Url
href) Maybe (Text -> Bool) -> Maybe Text -> Maybe Bool
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Url -> Maybe Text
getRegName Url
linky) of
Maybe Bool
Nothing -> Bool
False
Just Bool
b -> Bool
b
type HostName = String
getHostName :: Link -> Maybe HostName
getHostName :: Link -> Maybe Url
getHostName (Link Url
url) = do
URI
uri <- Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Text -> Maybe URI) -> Text -> Maybe URI
forall a b. (a -> b) -> a -> b
$ Url -> Text
pack Url
url
case (Authority -> Url) -> Either Bool Authority -> Either Bool Url
forall a b. (a -> b) -> Either Bool a -> Either Bool b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> Url
unpack (Text -> Url) -> (Authority -> Text) -> Authority -> Url
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RText 'Host -> Text
forall (l :: RTextLabel). RText l -> Text
unRText (RText 'Host -> Text)
-> (Authority -> RText 'Host) -> Authority -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Authority
-> Getting (RText 'Host) Authority (RText 'Host) -> RText 'Host
forall s a. s -> Getting a s a -> a
^. Getting (RText 'Host) Authority (RText 'Host)
Lens' Authority (RText 'Host)
UL.authHost)) (URI
uri URI
-> Getting (Either Bool Authority) URI (Either Bool Authority)
-> Either Bool Authority
forall s a. s -> Getting a s a -> a
^. Getting (Either Bool Authority) URI (Either Bool Authority)
Lens' URI (Either Bool Authority)
UL.uriAuthority) of
Right Url
hn -> Url -> Maybe Url
forall a. a -> Maybe a
Just Url
hn
Either Bool Url
_ -> Maybe Url
forall a. Maybe a
Nothing
instance IsLink Link where
renderLink :: Link -> Url
renderLink (Link Url
url) = Url
url
maybeUsefulNewUrl :: Link -> [(Link, a)] -> Link -> Maybe Link
maybeUsefulNewUrl :: forall a. Link -> [(Link, a)] -> Link -> Maybe Link
maybeUsefulNewUrl Link
baseUrl [(Link, a)]
tree Link
url = Link -> Link -> Maybe Link
maybeUsefulUrl Link
baseUrl Link
url Maybe Link -> (Link -> Maybe Link) -> Maybe Link
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [(Link, a)] -> Link -> Maybe Link
forall a. [(Link, a)] -> Link -> Maybe Link
maybeNewUrl [(Link, a)]
tree
urlIsNew :: [(a, Url)] -> HrefURI -> Bool
urlIsNew :: forall a. [(a, Url)] -> Url -> Bool
urlIsNew [] Url
uri = Bool
True
urlIsNew ((a, Url)
branch:[(a, Url)]
tree) Url
uri
| Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece))) -> Bool
forall (f :: * -> *) a. (Eq1 f, Eq a) => f a -> f a -> Bool
eq1 ((URI -> Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe URI -> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Maybe (Bool, NonEmpty (RText 'PathPiece))
uriPath (Url -> Maybe URI
mkURI' (Url
uri))) ((URI -> Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe URI -> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Maybe (Bool, NonEmpty (RText 'PathPiece))
uriPath (Url -> Maybe URI
mkURI' ((a, Url) -> Url
forall a b. (a, b) -> b
snd (a, Url)
branch))) = Bool
False
| Bool
otherwise = [(a, Url)] -> Url -> Bool
forall a. [(a, Url)] -> Url -> Bool
urlIsNew [(a, Url)]
tree Url
uri
where
mkURI' :: String -> Maybe URI
mkURI' :: Url -> Maybe URI
mkURI' Url
url = Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Url -> Text
pack Url
url)
maybeNewUrl :: [(Link, a)] -> Link -> Maybe Link
maybeNewUrl :: forall a. [(Link, a)] -> Link -> Maybe Link
maybeNewUrl [] Link
uri = Link -> Maybe Link
forall a. a -> Maybe a
Just Link
uri
maybeNewUrl ((Link, a)
branch:[(Link, a)]
tree) Link
uri =
if Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece))) -> Bool
forall (f :: * -> *) a. (Eq1 f, Eq a) => f a -> f a -> Bool
eq1 ((URI -> Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe URI -> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Maybe (Bool, NonEmpty (RText 'PathPiece))
uriPath (Url -> Maybe URI
mkURI' (Link -> Url
forall a. IsLink a => a -> Url
renderLink Link
uri))) ((URI -> Maybe (Bool, NonEmpty (RText 'PathPiece)))
-> Maybe URI -> Maybe (Maybe (Bool, NonEmpty (RText 'PathPiece)))
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap URI -> Maybe (Bool, NonEmpty (RText 'PathPiece))
uriPath (Url -> Maybe URI
mkURI' (Url -> Maybe URI) -> ((Link, a) -> Url) -> (Link, a) -> Maybe URI
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Link -> Url
forall a. IsLink a => a -> Url
renderLink (Link -> Url) -> ((Link, a) -> Link) -> (Link, a) -> Url
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Link, a) -> Link
forall a b. (a, b) -> a
fst ((Link, a) -> Maybe URI) -> (Link, a) -> Maybe URI
forall a b. (a -> b) -> a -> b
$ (Link, a)
branch))
then Maybe Link
forall a. Maybe a
Nothing
else [(Link, a)] -> Link -> Maybe Link
forall a. [(Link, a)] -> Link -> Maybe Link
maybeNewUrl [(Link, a)]
tree Link
uri
where
mkURI' :: String -> Maybe URI
mkURI' :: Url -> Maybe URI
mkURI' Url
url = Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Url -> Text
pack Url
url)
maybeUsefulUrl :: Link -> Link -> Maybe Link
maybeUsefulUrl :: Link -> Link -> Maybe Link
maybeUsefulUrl (Link Url
baseUrl) Link
url = do
Link -> Maybe Link
noJSorShit Link
url
Link -> Maybe Url
numberOfQueryParamsIsZero Link
url
if Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isInfixOf Url
baseUrl (Link -> Url
forall a. IsLink a => a -> Url
renderLink Link
url) then Link -> Maybe Link
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return Link
url else Maybe Link
forall a. Maybe a
Nothing
Link -> Maybe Link
allowableEndings Link
url
where
noJSorShit :: Link -> Maybe Link
noJSorShit :: Link -> Maybe Link
noJSorShit Link
link =
if (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Bool -> [Bool] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Bool
True (Link -> [Url] -> [Bool]
urlContains Link
link [Url
"javascript", Url
"about", Url
"help", Url
"#"]))
then Link -> Maybe Link
forall a. a -> Maybe a
Just Link
url
else Maybe Link
forall a. Maybe a
Nothing
urlContains :: Link -> [String] -> [Bool]
urlContains :: Link -> [Url] -> [Bool]
urlContains (Link Url
url) [Url]
icases = (Url -> Bool) -> [Url] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (((Url -> Url -> Bool) -> Url -> Url -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isInfixOf) ((Char -> Char) -> Url -> Url
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Char
toLower Url
url)) [Url]
icases
allowableEndings :: Link -> Maybe Link
allowableEndings Link
url =
let lastPath :: Url
lastPath = Url -> Maybe Url -> Url
forall a. a -> Maybe a -> a
fromMaybe Url
"" (Maybe Url -> Url) -> Maybe Url -> Url
forall a b. (a -> b) -> a -> b
$ Link -> Maybe Url
getLastPath Link
url
in
if (Char -> Url -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Char
'.' Url
lastPath)
then Url -> Link -> Maybe Link
forall {a}. Url -> a -> Maybe a
allowableFile Url
lastPath Link
url
else Link -> Maybe Link
forall a. a -> Maybe a
Just Link
url
allowableFile :: Url -> a -> Maybe a
allowableFile Url
endPath a
url =
if Bool -> [Bool] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem Bool
True ([Bool] -> Bool) -> [Bool] -> Bool
forall a b. (a -> b) -> a -> b
$ (Url -> Bool) -> [Url] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Url
x -> Url -> Url -> Bool
forall a. Eq a => [a] -> [a] -> Bool
isSuffixOf Url
x ((Char -> Char) -> Url -> Url
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Char
toLower Url
endPath)) [Url]
allowed
then a -> Maybe a
forall a. a -> Maybe a
Just a
url
else Maybe a
forall a. Maybe a
Nothing
where allowed :: [Url]
allowed = [Url
".aspx", Url
".html", Url
".pdf", Url
".php"]
getLastPath :: Link -> Maybe String
getLastPath :: Link -> Maybe Url
getLastPath (Link Url
url) = do
URI
x <- Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Text -> Maybe URI) -> Text -> Maybe URI
forall a b. (a -> b) -> a -> b
$ Url -> Text
pack Url
url
(Bool, NonEmpty (RText 'PathPiece))
x' <- URI -> Maybe (Bool, NonEmpty (RText 'PathPiece))
uriPath URI
x
Url -> Maybe Url
forall a. a -> Maybe a
Just (Url -> Maybe Url)
-> ((Bool, NonEmpty (RText 'PathPiece)) -> Url)
-> (Bool, NonEmpty (RText 'PathPiece))
-> Maybe Url
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Url
unpack (Text -> Url)
-> ((Bool, NonEmpty (RText 'PathPiece)) -> Text)
-> (Bool, NonEmpty (RText 'PathPiece))
-> Url
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RText 'PathPiece -> Text
forall (l :: RTextLabel). RText l -> Text
unRText (RText 'PathPiece -> Text)
-> ((Bool, NonEmpty (RText 'PathPiece)) -> RText 'PathPiece)
-> (Bool, NonEmpty (RText 'PathPiece))
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty (RText 'PathPiece) -> RText 'PathPiece
forall a. NonEmpty a -> a
NE.last (NonEmpty (RText 'PathPiece) -> RText 'PathPiece)
-> ((Bool, NonEmpty (RText 'PathPiece))
-> NonEmpty (RText 'PathPiece))
-> (Bool, NonEmpty (RText 'PathPiece))
-> RText 'PathPiece
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Bool, NonEmpty (RText 'PathPiece)) -> NonEmpty (RText 'PathPiece)
forall a b. (a, b) -> b
snd ((Bool, NonEmpty (RText 'PathPiece)) -> Maybe Url)
-> (Bool, NonEmpty (RText 'PathPiece)) -> Maybe Url
forall a b. (a -> b) -> a -> b
$ (Bool, NonEmpty (RText 'PathPiece))
x'
usefulNewUrls :: Link -> [(Link, a)] -> [Link] -> [Maybe Link]
usefulNewUrls :: forall a. Link -> [(Link, a)] -> [Link] -> [Maybe Link]
usefulNewUrls Link
_ [(Link, a)]
_ [] = []
usefulNewUrls Link
baseUrl [(Link, a)]
tree (Link
link:[Link]
links) = (Link -> [(Link, a)] -> Link -> Maybe Link
forall a. Link -> [(Link, a)] -> Link -> Maybe Link
maybeUsefulNewUrl Link
baseUrl [(Link, a)]
tree Link
link) Maybe Link -> [Maybe Link] -> [Maybe Link]
forall a. a -> [a] -> [a]
: Link -> [(Link, a)] -> [Link] -> [Maybe Link]
forall a. Link -> [(Link, a)] -> [Link] -> [Maybe Link]
usefulNewUrls Link
baseUrl [(Link, a)]
tree [Link]
links
usefulUrls :: Link -> [Link] -> [Maybe Link]
usefulUrls :: Link -> [Link] -> [Maybe Link]
usefulUrls Link
baseUrl (Link
link:[Link]
links) = Link -> Link -> Maybe Link
maybeUsefulUrl Link
baseUrl Link
link Maybe Link -> [Maybe Link] -> [Maybe Link]
forall a. a -> [a] -> [a]
: Link -> [Link] -> [Maybe Link]
usefulUrls Link
baseUrl [Link]
links
numberOfQueryParamsIsZero :: Link -> Maybe String
numberOfQueryParamsIsZero :: Link -> Maybe Url
numberOfQueryParamsIsZero (Link Url
uri) = do
URI
x <- Text -> Maybe URI
forall (m :: * -> *). MonadThrow m => Text -> m URI
mkURI (Url -> Text
pack Url
uri)
if [QueryParam] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (URI -> [QueryParam]
uriQuery URI
x) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Url -> Maybe Url
forall a. a -> Maybe a
Just Url
uri
else Maybe Url
forall a. Maybe a
Nothing
deriveJSON defaultOptions ''Link