-- |
-- A minimal attoparsec-like parser that uses CPS to reduce allocations
-- and perform better than attoparsec, at least the way we use it in
-- hpgsql.
--
-- In benchmarks, this can improve performance by 12-15% materializing
-- query results.
module Hpgsql.SimpleParser
  ( Parser (..),
    ParseResult (..),
    parseOnly,
    take,
    endOfInput,
    match,
  )
where

import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Prelude hiding (take)

data ParseResult a
  = ParseFail !String
  | ParseOk !a
  deriving stock (Int -> ParseResult a -> ShowS
[ParseResult a] -> ShowS
ParseResult a -> String
(Int -> ParseResult a -> ShowS)
-> (ParseResult a -> String)
-> ([ParseResult a] -> ShowS)
-> Show (ParseResult a)
forall a. Show a => Int -> ParseResult a -> ShowS
forall a. Show a => [ParseResult a] -> ShowS
forall a. Show a => ParseResult a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> ParseResult a -> ShowS
showsPrec :: Int -> ParseResult a -> ShowS
$cshow :: forall a. Show a => ParseResult a -> String
show :: ParseResult a -> String
$cshowList :: forall a. Show a => [ParseResult a] -> ShowS
showList :: [ParseResult a] -> ShowS
Show)

-- | A parser that consumes a strict 'ByteString'.
newtype Parser a = Parser
  { forall a.
Parser a
-> forall r.
   ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
unParser ::
      forall r.
      ByteString ->
      (String -> r) ->
      -- \^ failure continuation
      (a -> ByteString -> r) ->
      -- \^ success continuation
      r
  }

instance Functor Parser where
  fmap :: forall a b. (a -> b) -> Parser a -> Parser b
fmap a -> b
f (Parser forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p) = (forall r.
 ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
 -> Parser b)
-> (forall r.
    ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf b -> ByteString -> r
ks ->
    ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p ByteString
bs String -> r
kf (\a
a ByteString
bs' -> b -> ByteString -> r
ks (a -> b
f a
a) ByteString
bs')
  {-# INLINE fmap #-}

instance Applicative Parser where
  pure :: forall a. a -> Parser a
pure a
a = (forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
 -> Parser a)
-> (forall r.
    ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
_ a -> ByteString -> r
ks -> a -> ByteString -> r
ks a
a ByteString
bs
  {-# INLINE pure #-}

  Parser forall r.
ByteString -> (String -> r) -> ((a -> b) -> ByteString -> r) -> r
pf <*> :: forall a b. Parser (a -> b) -> Parser a -> Parser b
<*> Parser forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
pa = (forall r.
 ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
 -> Parser b)
-> (forall r.
    ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf b -> ByteString -> r
ks ->
    ByteString -> (String -> r) -> ((a -> b) -> ByteString -> r) -> r
forall r.
ByteString -> (String -> r) -> ((a -> b) -> ByteString -> r) -> r
pf ByteString
bs String -> r
kf (\a -> b
f ByteString
bs' -> ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
pa ByteString
bs' String -> r
kf (\a
a ByteString
bs'' -> b -> ByteString -> r
ks (a -> b
f a
a) ByteString
bs''))
  {-# INLINE (<*>) #-}

instance Monad Parser where
  return :: forall a. a -> Parser a
return = a -> Parser a
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
  {-# INLINE return #-}

  Parser forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p >>= :: forall a b. Parser a -> (a -> Parser b) -> Parser b
>>= a -> Parser b
k = (forall r.
 ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
 -> Parser b)
-> (forall r.
    ByteString -> (String -> r) -> (b -> ByteString -> r) -> r)
-> Parser b
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf b -> ByteString -> r
ks ->
    ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p ByteString
bs String -> r
kf (\a
a ByteString
bs' -> Parser b
-> forall r.
   ByteString -> (String -> r) -> (b -> ByteString -> r) -> r
forall a.
Parser a
-> forall r.
   ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
unParser (a -> Parser b
k a
a) ByteString
bs' String -> r
kf b -> ByteString -> r
ks)
  {-# INLINE (>>=) #-}

instance MonadFail Parser where
  fail :: forall a. String -> Parser a
fail String
msg = (forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
 -> Parser a)
-> (forall r.
    ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
forall a b. (a -> b) -> a -> b
$ \ByteString
_ String -> r
kf a -> ByteString -> r
_ -> String -> r
kf String
msg
  {-# INLINE fail #-}

-- | Run a parser and return either an error message or the parsed value,
-- using the strict 'ParseResult' type. Any unconsumed trailing input is
-- discarded.
parseOnly :: Parser a -> ByteString -> ParseResult a
parseOnly :: forall a. Parser a -> ByteString -> ParseResult a
parseOnly (Parser forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p) ByteString
bs = ByteString
-> (String -> ParseResult a)
-> (a -> ByteString -> ParseResult a)
-> ParseResult a
forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p ByteString
bs String -> ParseResult a
forall a. String -> ParseResult a
ParseFail (\a
a ByteString
_ -> a -> ParseResult a
forall a. a -> ParseResult a
ParseOk a
a)
{-# INLINE parseOnly #-}

-- | Consume exactly @n@ bytes of input, failing if fewer than @n@ bytes
-- remain.
take :: Int -> Parser ByteString
take :: Int -> Parser ByteString
take Int
n = (forall r.
 ByteString
 -> (String -> r) -> (ByteString -> ByteString -> r) -> r)
-> Parser ByteString
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString
  -> (String -> r) -> (ByteString -> ByteString -> r) -> r)
 -> Parser ByteString)
-> (forall r.
    ByteString
    -> (String -> r) -> (ByteString -> ByteString -> r) -> r)
-> Parser ByteString
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf ByteString -> ByteString -> r
ks ->
  -- Special-casing n>0 helps reduce memory usage
  -- by ~1.5% in our benchmarks without a measurable
  -- difference in run time
  if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
    then
      if ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n
        then case Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
n ByteString
bs of
          (!ByteString
h, !ByteString
t) -> ByteString -> ByteString -> r
ks ByteString
h ByteString
t
        else String -> r
kf (String
"take: wanted " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
n String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" bytes but only " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show (ByteString -> Int
BS.length ByteString
bs) String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
" remain")
    else
      ByteString -> ByteString -> r
ks ByteString
forall a. Monoid a => a
mempty ByteString
bs
{-# INLINE take #-}

-- | Succeeds only when the input has been fully consumed.
endOfInput :: Parser ()
endOfInput :: Parser ()
endOfInput = (forall r.
 ByteString -> (String -> r) -> (() -> ByteString -> r) -> r)
-> Parser ()
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString -> (String -> r) -> (() -> ByteString -> r) -> r)
 -> Parser ())
-> (forall r.
    ByteString -> (String -> r) -> (() -> ByteString -> r) -> r)
-> Parser ()
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf () -> ByteString -> r
ks ->
  if ByteString -> Bool
BS.null ByteString
bs then () -> ByteString -> r
ks () ByteString
bs else String -> r
kf String
"endOfInput: input remaining"
{-# INLINE endOfInput #-}

-- | Run a parser and additionally return the slice of input it consumed.
-- Because the input is a strict 'ByteString', the returned slice is a view
-- over the original buffer and allocates no extra memory.
match :: Parser a -> Parser (ByteString, a)
match :: forall a. Parser a -> Parser (ByteString, a)
match (Parser forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p) = (forall r.
 ByteString
 -> (String -> r) -> ((ByteString, a) -> ByteString -> r) -> r)
-> Parser (ByteString, a)
forall a.
(forall r.
 ByteString -> (String -> r) -> (a -> ByteString -> r) -> r)
-> Parser a
Parser ((forall r.
  ByteString
  -> (String -> r) -> ((ByteString, a) -> ByteString -> r) -> r)
 -> Parser (ByteString, a))
-> (forall r.
    ByteString
    -> (String -> r) -> ((ByteString, a) -> ByteString -> r) -> r)
-> Parser (ByteString, a)
forall a b. (a -> b) -> a -> b
$ \ByteString
bs String -> r
kf (ByteString, a) -> ByteString -> r
ks ->
  ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
forall r.
ByteString -> (String -> r) -> (a -> ByteString -> r) -> r
p
    ByteString
bs
    String -> r
kf
    ( \a
a ByteString
bs' ->
        let !consumed :: ByteString
consumed = Int -> ByteString -> ByteString
BS.take (ByteString -> Int
BS.length ByteString
bs Int -> Int -> Int
forall a. Num a => a -> a -> a
- ByteString -> Int
BS.length ByteString
bs') ByteString
bs
         in (ByteString, a) -> ByteString -> r
ks (ByteString
consumed, a
a) ByteString
bs'
    )
{-# INLINE match #-}