{-# LANGUAGE FlexibleContexts #-}

module Scrappy.Grep.DSL.Interpreter
  ( interpret
  , interpretWithRefs
  , InterpreterError(..)
  ) where

import Scrappy.Grep.DSL
import Scrappy.Scrape (ScraperT)

import Text.Parsec hiding ((<|>))
import Data.Map (Map)
import qualified Data.Map as Map
import Control.Applicative ((<|>))

data InterpreterError
  = UnknownRef String
  deriving (Int -> InterpreterError -> ShowS
[InterpreterError] -> ShowS
InterpreterError -> Html
(Int -> InterpreterError -> ShowS)
-> (InterpreterError -> Html)
-> ([InterpreterError] -> ShowS)
-> Show InterpreterError
forall a.
(Int -> a -> ShowS) -> (a -> Html) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> InterpreterError -> ShowS
showsPrec :: Int -> InterpreterError -> ShowS
$cshow :: InterpreterError -> Html
show :: InterpreterError -> Html
$cshowList :: [InterpreterError] -> ShowS
showList :: [InterpreterError] -> ShowS
Show, InterpreterError -> InterpreterError -> Bool
(InterpreterError -> InterpreterError -> Bool)
-> (InterpreterError -> InterpreterError -> Bool)
-> Eq InterpreterError
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: InterpreterError -> InterpreterError -> Bool
== :: InterpreterError -> InterpreterError -> Bool
$c/= :: InterpreterError -> InterpreterError -> Bool
/= :: InterpreterError -> InterpreterError -> Bool
Eq)

-- | Interpret AST without named references (refs will fail)
interpret :: ParserExpr -> Either InterpreterError (ScraperT String)
interpret :: ParserExpr -> Either InterpreterError (ScraperT Html)
interpret = Map Html (ScraperT Html)
-> ParserExpr -> Either InterpreterError (ScraperT Html)
interpretWithRefs Map Html (ScraperT Html)
forall k a. Map k a
Map.empty

-- | Interpret AST with named parser references
-- The Map contains pre-interpreted parsers for each ref name
interpretWithRefs :: Map String (ScraperT String) -> ParserExpr -> Either InterpreterError (ScraperT String)
interpretWithRefs :: Map Html (ScraperT Html)
-> ParserExpr -> Either InterpreterError (ScraperT Html)
interpretWithRefs Map Html (ScraperT Html)
refs = ParserExpr -> Either InterpreterError (ScraperT Html)
go
  where
    go :: ParserExpr -> Either InterpreterError (ScraperT String)
    go :: ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
expr = case ParserExpr
expr of
      -- Primitives (return matched text as String)
      PChar Char
c     -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> ParsecT Html () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
c
      PString Html
s   -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ Html -> ScraperT Html
forall s (m :: * -> *) u.
Stream s m Char =>
Html -> ParsecT s u m Html
string Html
s
      ParserExpr
PAnyChar    -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar
      ParserExpr
PDigit      -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
      ParserExpr
PLetter     -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
letter
      ParserExpr
PAlphaNum   -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum
      ParserExpr
PSpace      -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space
      ParserExpr
PSpaces     -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ ParsecT Html () Identity Char -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space
      ParserExpr
PNewline    -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Html () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
newline
      POneOf Html
cs   -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Html -> ParsecT Html () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Html -> ParsecT s u m Char
oneOf Html
cs
      PNoneOf Html
cs  -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ (Char -> ShowS
forall a. a -> [a] -> [a]
:[]) (Char -> Html) -> ParsecT Html () Identity Char -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Html -> ParsecT Html () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Html -> ParsecT s u m Char
noneOf Html
cs

      -- Sequence: run p1 then p2, return p2's result
      PSeq ParserExpr
p1 ParserExpr
p2 -> do
        ScraperT Html
parser1 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p1
        ScraperT Html
parser2 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p2
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ ScraperT Html
parser1 ScraperT Html -> ScraperT Html -> ScraperT Html
forall a b.
ParsecT Html () Identity a
-> ParsecT Html () Identity b -> ParsecT Html () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ScraperT Html
parser2

      -- Sequence with concatenation: run both, concatenate results
      PSeqConcat ParserExpr
p1 ParserExpr
p2 -> do
        ScraperT Html
parser1 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p1
        ScraperT Html
parser2 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p2
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ Html -> ShowS
forall a. [a] -> [a] -> [a]
(++) (Html -> ShowS) -> ScraperT Html -> ParsecT Html () Identity ShowS
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ScraperT Html
parser1 ParsecT Html () Identity ShowS -> ScraperT Html -> ScraperT Html
forall a b.
ParsecT Html () Identity (a -> b)
-> ParsecT Html () Identity a -> ParsecT Html () Identity b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ScraperT Html
parser2

      -- Alternative: try p1, if fails try p2
      PAlt ParserExpr
p1 ParserExpr
p2 -> do
        ScraperT Html
parser1 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p1
        ScraperT Html
parser2 <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p2
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
parser1 ScraperT Html -> ScraperT Html -> ScraperT Html
forall a.
ParsecT Html () Identity a
-> ParsecT Html () Identity a -> ParsecT Html () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ScraperT Html
parser2

      -- Many: zero or more
      PMany ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ [Html] -> Html
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([Html] -> Html)
-> ParsecT Html () Identity [Html] -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ScraperT Html -> ParsecT Html () Identity [Html]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
parser)

      -- Some: one or more
      PSome ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ [Html] -> Html
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([Html] -> Html)
-> ParsecT Html () Identity [Html] -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ScraperT Html -> ParsecT Html () Identity [Html]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 (ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
parser)

      -- Optional: zero or one
      POptional ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ Html -> ScraperT Html -> ScraperT Html
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option Html
"" (ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
parser)

      -- Try: backtracking
      PTry ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
parser

      -- Between: parse between delimiters
      PBetween Char
open Char
close ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ do
          Char
o <- Char -> ParsecT Html () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
open
          Html
result <- ScraperT Html
parser
          Char
c <- Char -> ParsecT Html () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
close
          Html -> ScraperT Html
forall a. a -> ParsecT Html () Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Html -> ScraperT Html) -> Html -> ScraperT Html
forall a b. (a -> b) -> a -> b
$ [Char
o] Html -> ShowS
forall a. [a] -> [a] -> [a]
++ Html
result Html -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char
c]

      -- Count: exactly n repetitions
      PCount Int
n ParserExpr
p -> do
        ScraperT Html
parser <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ [Html] -> Html
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([Html] -> Html)
-> ParsecT Html () Identity [Html] -> ScraperT Html
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> ScraperT Html -> ParsecT Html () Identity [Html]
forall s (m :: * -> *) t u a.
Stream s m t =>
Int -> ParsecT s u m a -> ParsecT s u m [a]
count Int
n ScraperT Html
parser

      -- ManyTill: non-greedy match until end
      PManyTill ParserExpr
p ParserExpr
end -> do
        ScraperT Html
parserP <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
p
        ScraperT Html
parserEnd <- ParserExpr -> Either InterpreterError (ScraperT Html)
go ParserExpr
end
        ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right (ScraperT Html -> Either InterpreterError (ScraperT Html))
-> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ ScraperT Html -> ScraperT Html -> ScraperT Html
manyTillConcat ScraperT Html
parserP ScraperT Html
parserEnd

      -- Reference: lookup in refs map
      PRef Html
name -> case Html -> Map Html (ScraperT Html) -> Maybe (ScraperT Html)
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Html
name Map Html (ScraperT Html)
refs of
        Maybe (ScraperT Html)
Nothing -> InterpreterError -> Either InterpreterError (ScraperT Html)
forall a b. a -> Either a b
Left (InterpreterError -> Either InterpreterError (ScraperT Html))
-> InterpreterError -> Either InterpreterError (ScraperT Html)
forall a b. (a -> b) -> a -> b
$ Html -> InterpreterError
UnknownRef Html
name
        Just ScraperT Html
parser -> ScraperT Html -> Either InterpreterError (ScraperT Html)
forall a b. b -> Either a b
Right ScraperT Html
parser

-- | manyTill that concatenates all matched strings including the end
manyTillConcat :: ScraperT String -> ScraperT String -> ScraperT String
manyTillConcat :: ScraperT Html -> ScraperT Html -> ScraperT Html
manyTillConcat ScraperT Html
p ScraperT Html
end = ScraperT Html
go
  where
    go :: ScraperT Html
go = (ScraperT Html -> ScraperT Html
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ScraperT Html
end) ScraperT Html -> ScraperT Html -> ScraperT Html
forall a.
ParsecT Html () Identity a
-> ParsecT Html () Identity a -> ParsecT Html () Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> do
      Html
x <- ScraperT Html
p
      Html
xs <- ScraperT Html
go
      Html -> ScraperT Html
forall a. a -> ParsecT Html () Identity a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Html
x Html -> ShowS
forall a. [a] -> [a] -> [a]
++ Html
xs)