{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK not-home #-}

-- | This is an internal module. You probably don't need to import this. Import
-- "Regex.Text" instead.
--
-- = WARNING
--
-- Definitions in this module allow violating invariants that would otherwise be
-- guaranteed by non-internal modules. Use at your own risk!
--
module Regex.Internal.Text
  (
    TextToken(..)
  , REText
  , textTokenFoldr

  , token
  , satisfy
  , char
  , charIgnoreCase
  , anyChar
  , oneOf
  , text
  , textIgnoreCase
  , manyText
  , someText
  , manyTextMin
  , someTextMin
  , manyTextOf
  , someTextOf
  , manyTextOfMin
  , someTextOfMin

  , naturalDec
  , integerDec
  , naturalHex
  , integerHex
  , wordRangeDec
  , intRangeDec
  , wordRangeHex
  , intRangeHex
  , wordDecN
  , wordHexN

  , toMatch
  , withMatch

  , reParse
  , ParserText
  , parse
  , parseSure

  , find
  , findAll
  , splitOn
  , replace
  , replaceAll
  ) where

import Control.Applicative ((<|>))
import qualified Control.Applicative as Ap
import Data.Char (ord)
import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Numeric.Natural (Natural)
import Data.Text (Text)
import qualified Data.Text as T
#ifdef __GLASGOW_HASKELL__
import qualified Data.Text.Array as TArray
import qualified Data.Text.Internal as TInternal
import qualified Data.Text.Unsafe as TUnsafe
import qualified Data.Text.Internal.Encoding.Utf8 as TInternalUtf8
#else
import Control.Applicative (many, some)
import qualified Regex.Internal.List as RL
#endif

import Data.CharSet (CharSet)
import qualified Data.CharSet as CS
import Regex.Internal.Parser (Parser)
import qualified Regex.Internal.Parser as P
import Regex.Internal.Regex (RE(..), Greediness(..), Strictness(..))
import qualified Regex.Internal.Regex as R
import qualified Regex.Internal.Num as RNum
import qualified Regex.Internal.Generated.CaseFold as CF

----------------------
-- Token and Text REs
----------------------

-- | The token type used for parsing @Text@.

#ifdef __GLASGOW_HASKELL__
-- This module uses RE TextToken for Text regexes instead of simply RE Char to
-- support Text slicing. It does mean that use cases not using slicing pay a
-- small cost, but it is not worth having two separate Text regex APIs.
--
-- Slicing is made possible by the unsafeAdjacentAppend function. Of course,
-- this means that REs using it MUST NOT be used with multiple Texts, such as
-- trying to parse chunks of a lazy Text.
data TextToken = TextToken
  { TextToken -> Array
tArr     :: {-# UNPACK #-} !TArray.Array
  , TextToken -> Int
tOffset  :: {-# UNPACK #-} !Int
  , TextToken -> Char
tChar    :: {-# UNPACK #-} !Char
  }
#else
-- No slicing for non-GHC. This means that there is no performance advantage
-- over Regex.List, but it is still convenient to use when working with Text.
newtype TextToken = TextToken { tChar :: Char }
#endif

-- | A type alias for convenience.
--
-- A function which accepts a @RE c a@ will accept a @REText a@.
type REText = RE TextToken

-- | A type alias for convenience.
--
-- A function which accepts a @Parser c a@ will accept a @ParserText a@.
type ParserText = Parser TextToken

-- | Parse a @Char@ into an @a@ if the given function returns @Just@.
token :: (Char -> Maybe a) -> REText a
token :: forall a. (Char -> Maybe a) -> REText a
token Char -> Maybe a
t = (TextToken -> Maybe a) -> RE TextToken a
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> Char -> Maybe a
t (TextToken -> Char
tChar TextToken
tok))
{-# INLINE token #-}

-- | Parse a @Char@ if it satisfies the given predicate.
satisfy :: (Char -> Bool) -> REText Char
satisfy :: (Char -> Bool) -> REText Char
satisfy Char -> Bool
p = (Char -> Maybe Char) -> REText Char
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Char) -> REText Char)
-> (Char -> Maybe Char) -> REText Char
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char -> Bool
p Char
c then Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c else Maybe Char
forall a. Maybe a
Nothing
{-# INLINE satisfy #-}

-- | Parse the given @Char@.
char :: Char -> REText Char
char :: Char -> REText Char
char !Char
c = (Char -> Bool) -> REText Char
satisfy (Char
cChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==)

-- | Parse the given @Char@, ignoring case.
--
-- Comparisons are performed after applying
-- [simple case folding](https://www.unicode.org/reports/tr44/#Simple_Case_Folding)
-- as described by the Unicode standard.
charIgnoreCase :: Char -> REText Char
charIgnoreCase :: Char -> REText Char
charIgnoreCase Char
c = (Char -> Bool) -> REText Char
satisfy ((Char -> Bool) -> REText Char) -> (Char -> Bool) -> REText Char
forall a b. (a -> b) -> a -> b
$ (Char
c'Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) (Char -> Bool) -> (Char -> Char) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
CF.caseFoldSimple
  where
    !c' :: Char
c' = Char -> Char
CF.caseFoldSimple Char
c
-- See Note [Why simple case fold]

-- | Parse any @Char@.
anyChar :: REText Char
anyChar :: REText Char
anyChar = (Char -> Maybe Char) -> REText Char
forall a. (Char -> Maybe a) -> REText a
token Char -> Maybe Char
forall a. a -> Maybe a
Just

-- | Parse a @Char@ if it is a member of the @CharSet@.
oneOf :: CharSet -> REText Char
oneOf :: CharSet -> REText Char
oneOf !CharSet
cs = (Char -> Bool) -> REText Char
satisfy (Char -> CharSet -> Bool
`CS.member` CharSet
cs)

-- | Parse the given @Text@.
text :: Text -> REText Text
text :: Text -> REText Text
text Text
t =
  Text
t Text -> RE TextToken () -> REText Text
forall a b. a -> RE TextToken b -> RE TextToken a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$
#ifdef __GLASGOW_HASKELL__
    (Char -> RE TextToken () -> RE TextToken ())
-> RE TextToken () -> Text -> RE TextToken ()
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr'
#else
    T.foldr
#endif
      (\Char
c RE TextToken ()
z -> Char -> REText Char
char Char
c REText Char -> RE TextToken () -> RE TextToken ()
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> RE TextToken ()
z) (() -> RE TextToken ()
forall a. a -> RE TextToken a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) Text
t

-- | Parse the given @Text@, ignoring case.
--
-- Comparisons are performed after applying
-- [simple case folding](https://www.unicode.org/reports/tr44/#Simple_Case_Folding)
-- as described by the Unicode standard.
textIgnoreCase :: Text -> REText Text
textIgnoreCase :: Text -> REText Text
textIgnoreCase Text
t =
#ifdef __GLASGOW_HASKELL__
  (Char -> REText Text -> REText Text)
-> REText Text -> Text -> REText Text
forall a. (Char -> a -> a) -> a -> Text -> a
T.foldr' (\Char
c REText Text
cs -> (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
unsafeAdjacentAppend (Char -> REText Text
ignoreCaseTokenMatch Char
c) REText Text
cs)
           (Text -> REText Text
forall a. a -> RE TextToken a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
T.empty)
           Text
t
#else
  T.pack <$> T.foldr f (pure []) t
  where
    f c z = Ap.liftA2 (:) (satisfy (\c'' -> CF.caseFoldSimple c'' == c')) z
      where
        !c' = CF.caseFoldSimple c
#endif
-- See Note [Why simple case fold]

-- | Parse any @Text@. Biased towards matching more.
manyText :: REText Text
manyText :: REText Text
manyText =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty REText Text
anyTokenMatch
#else
  T.pack <$> many anyChar
#endif

-- | Parse any non-empty @Text@. Biased towards matching more.
someText :: REText Text
someText :: REText Text
someText =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
unsafeAdjacentAppend REText Text
anyTokenMatch REText Text
manyText
#else
  T.pack <$> some anyChar
#endif

-- | Parse any @Text@. Minimal, i.e. biased towards matching less.
manyTextMin :: REText Text
manyTextMin :: REText Text
manyTextMin =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlManyMin' Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty REText Text
anyTokenMatch
#else
  T.pack <$> R.manyMin anyChar
#endif

-- | Parse any non-empty @Text@. Minimal, i.e. biased towards matching less.
someTextMin :: REText Text
someTextMin :: REText Text
someTextMin =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
unsafeAdjacentAppend REText Text
anyTokenMatch REText Text
manyTextMin
#else
  T.pack <$> R.someMin anyChar
#endif

-- | Parse any @Text@ containing members of the @CharSet@.
-- Biased towards matching more.
manyTextOf :: CharSet -> REText Text
manyTextOf :: CharSet -> REText Text
manyTextOf !CharSet
cs =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty (CharSet -> REText Text
oneOfTokenMatch CharSet
cs)
#else
  T.pack <$> many (satisfy (`CS.member` cs))
#endif

-- | Parse any non-empty @Text@ containing members of the @CharSet@.
-- Biased towards matching more.
someTextOf :: CharSet -> REText Text
someTextOf :: CharSet -> REText Text
someTextOf !CharSet
cs =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
unsafeAdjacentAppend (CharSet -> REText Text
oneOfTokenMatch CharSet
cs) (CharSet -> REText Text
manyTextOf CharSet
cs)
#else
  T.pack <$> some (satisfy (`CS.member` cs))
#endif

-- | Parse any @Text@ containing members of the @CharSet@.
-- Minimal, i.e. biased towards matching less.
manyTextOfMin :: CharSet -> REText Text
manyTextOfMin :: CharSet -> REText Text
manyTextOfMin !CharSet
cs =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> Text -> REText Text -> REText Text
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlManyMin' Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty (CharSet -> REText Text
oneOfTokenMatch CharSet
cs)
#else
  T.pack <$> R.manyMin (satisfy (`CS.member` cs))
#endif

-- | Parse any non-empty @Text@ containing members of the @CharSet@.
-- Minimal, i.e. biased towards matching less.
someTextOfMin :: CharSet -> REText Text
someTextOfMin :: CharSet -> REText Text
someTextOfMin !CharSet
cs =
#ifdef __GLASGOW_HASKELL__
  (Text -> Text -> Text) -> REText Text -> REText Text -> REText Text
forall a1 a2 b c. (a1 -> a2 -> b) -> RE c a1 -> RE c a2 -> RE c b
R.liftA2' Text -> Text -> Text
unsafeAdjacentAppend (CharSet -> REText Text
oneOfTokenMatch CharSet
cs) (CharSet -> REText Text
manyTextOfMin CharSet
cs)
#else
  T.pack <$> R.someMin (satisfy (`CS.member` cs))
#endif

-----------------
-- Numeric REs
-----------------

-- | Parse a decimal @Natural@.
-- Leading zeros are not accepted. Biased towards matching more.
naturalDec :: REText Natural
naturalDec :: REText Natural
naturalDec = (Word -> Word -> RE TextToken Word) -> REText Natural
forall c. (Word -> Word -> RE c Word) -> RE c Natural
RNum.mkNaturalDec Word -> Word -> RE TextToken Word
digitRange

-- | Parse a decimal @Integer@. Parse an optional sign, @\'-\'@ or @\'+\'@,
-- followed by the given @RE@, followed by the absolute value of the integer.
-- Leading zeros are not accepted. Biased towards matching more.
integerDec :: REText a -> REText Integer
integerDec :: forall a. REText a -> REText Integer
integerDec REText a
sep = RE TextToken ()
-> RE TextToken () -> REText Natural -> REText Integer
forall c minus plus.
RE c minus -> RE c plus -> RE c Natural -> RE c Integer
RNum.mkSignedInteger RE TextToken ()
minus RE TextToken ()
plus (REText a
sep REText a -> REText Natural -> REText Natural
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> REText Natural
naturalDec)

-- | Parse a hexadecimal @Natural@. Both uppercase @\'A\'..\'F\'@ and lowercase
-- @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
naturalHex :: REText Natural
naturalHex :: REText Natural
naturalHex = (Word -> Word -> RE TextToken Word) -> REText Natural
forall c. (Word -> Word -> RE c Word) -> RE c Natural
RNum.mkNaturalHex Word -> Word -> RE TextToken Word
hexDigitRange

-- | Parse a hexadecimal @Integer@. Parse an optional sign, @\'-\'@ or @\'+\'@,
-- followed by the given @RE@, followed by the absolute value of the integer.
-- Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
integerHex :: REText a -> REText Integer
integerHex :: forall a. REText a -> REText Integer
integerHex REText a
sep = RE TextToken ()
-> RE TextToken () -> REText Natural -> REText Integer
forall c minus plus.
RE c minus -> RE c plus -> RE c Natural -> RE c Integer
RNum.mkSignedInteger RE TextToken ()
minus RE TextToken ()
plus (REText a
sep REText a -> REText Natural -> REText Natural
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> REText Natural
naturalHex)

-- | Parse a decimal @Word@ in the range @[low..high]@.
-- Leading zeros are not accepted. Biased towards matching more.
wordRangeDec :: (Word, Word) -> REText Word
wordRangeDec :: (Word, Word) -> RE TextToken Word
wordRangeDec (Word, Word)
lh = (Word -> Word -> RE TextToken Word)
-> (Word, Word) -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> (Word, Word) -> RE c Word
RNum.mkWordRangeDec Word -> Word -> RE TextToken Word
digitRange (Word, Word)
lh

-- | Parse a decimal @Int@ in the range @[low..high]@. Parse an optional sign,
-- @\'-\'@ or @\'+\'@, followed by the given @RE@, followed by the absolute
-- value of the integer.
-- Leading zeros are not accepted. Biased towards matching more.
intRangeDec :: REText a -> (Int, Int) -> REText Int
intRangeDec :: forall a. REText a -> (Int, Int) -> REText Int
intRangeDec REText a
sep (Int, Int)
lh =
  RE TextToken ()
-> RE TextToken ()
-> ((Word, Word) -> RE TextToken Word)
-> (Int, Int)
-> REText Int
forall c minus plus.
RE c minus
-> RE c plus
-> ((Word, Word) -> RE c Word)
-> (Int, Int)
-> RE c Int
RNum.mkSignedIntRange RE TextToken ()
minus RE TextToken ()
plus ((REText a
sep REText a -> RE TextToken Word -> RE TextToken Word
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>) (RE TextToken Word -> RE TextToken Word)
-> ((Word, Word) -> RE TextToken Word)
-> (Word, Word)
-> RE TextToken Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word, Word) -> RE TextToken Word
wordRangeDec) (Int, Int)
lh

-- | Parse a hexadecimal @Word@ in the range @[low..high]@. Both uppercase
-- @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
wordRangeHex :: (Word, Word) -> REText Word
wordRangeHex :: (Word, Word) -> RE TextToken Word
wordRangeHex (Word, Word)
lh = (Word -> Word -> RE TextToken Word)
-> (Word, Word) -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> (Word, Word) -> RE c Word
RNum.mkWordRangeHex Word -> Word -> RE TextToken Word
hexDigitRange (Word, Word)
lh

-- | Parse a hexadecimal @Int@ in the range @[low..high]@. Parse an optional
-- sign, @\'-\'@ or @\'+\'@, followed by the given @RE@, followed by the
-- absolute value of the integer.
-- Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are accepted.
-- Leading zeros are not accepted. Biased towards matching more.
intRangeHex :: REText a -> (Int, Int) -> REText Int
intRangeHex :: forall a. REText a -> (Int, Int) -> REText Int
intRangeHex REText a
sep (Int, Int)
lh =
  RE TextToken ()
-> RE TextToken ()
-> ((Word, Word) -> RE TextToken Word)
-> (Int, Int)
-> REText Int
forall c minus plus.
RE c minus
-> RE c plus
-> ((Word, Word) -> RE c Word)
-> (Int, Int)
-> RE c Int
RNum.mkSignedIntRange RE TextToken ()
minus RE TextToken ()
plus ((REText a
sep REText a -> RE TextToken Word -> RE TextToken Word
forall a b. RE TextToken a -> RE TextToken b -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>) (RE TextToken Word -> RE TextToken Word)
-> ((Word, Word) -> RE TextToken Word)
-> (Word, Word)
-> RE TextToken Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word, Word) -> RE TextToken Word
wordRangeHex) (Int, Int)
lh

-- | Parse a @Word@ of exactly n decimal digits, including any leading zeros.
-- Will not parse values that do not fit in a @Word@.
-- Biased towards matching more.
wordDecN :: Int -> REText Word
wordDecN :: Int -> RE TextToken Word
wordDecN Int
n = (Word -> Word -> RE TextToken Word) -> Int -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> Int -> RE c Word
RNum.mkWordDecN Word -> Word -> RE TextToken Word
digitRange Int
n

-- | Parse a @Word@ of exactly n hexadecimal digits, including any leading
-- zeros. Both uppercase @\'A\'..\'F\'@ and lowercase @\'a\'..\'f\'@ are
-- accepted. Will not parse values that do not fit in a @Word@.
-- Biased towards matching more.
wordHexN :: Int -> REText Word
wordHexN :: Int -> RE TextToken Word
wordHexN Int
n = (Word -> Word -> RE TextToken Word) -> Int -> RE TextToken Word
forall c. (Word -> Word -> RE c Word) -> Int -> RE c Word
RNum.mkWordHexN Word -> Word -> RE TextToken Word
hexDigitRange Int
n

minus, plus :: REText ()
minus :: RE TextToken ()
minus = (Char -> Maybe ()) -> RE TextToken ()
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe ()) -> RE TextToken ())
-> (Char -> Maybe ()) -> RE TextToken ()
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' then () -> Maybe ()
forall a. a -> Maybe a
Just () else Maybe ()
forall a. Maybe a
Nothing
plus :: RE TextToken ()
plus = (Char -> Maybe ()) -> RE TextToken ()
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe ()) -> RE TextToken ())
-> (Char -> Maybe ()) -> RE TextToken ()
forall a b. (a -> b) -> a -> b
$ \Char
c -> if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'+' then () -> Maybe ()
forall a. a -> Maybe a
Just () else Maybe ()
forall a. Maybe a
Nothing

-- l and h must be in [0..9]
digitRange :: Word -> Word -> REText Word
digitRange :: Word -> Word -> RE TextToken Word
digitRange !Word
l !Word
h = (Char -> Maybe Word) -> RE TextToken Word
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Word) -> RE TextToken Word)
-> (Char -> Maybe Word) -> RE TextToken Word
forall a b. (a -> b) -> a -> b
$ \Char
c ->
  let d :: Word
d = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
  in if Word
l Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
d Bool -> Bool -> Bool
&& Word
d Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
h then Word -> Maybe Word
forall a. a -> Maybe a
Just Word
d else Maybe Word
forall a. Maybe a
Nothing

-- l and h must be in [0..15]
hexDigitRange :: Word -> Word -> REText Word
hexDigitRange :: Word -> Word -> RE TextToken Word
hexDigitRange !Word
l !Word
h = (Char -> Maybe Word) -> RE TextToken Word
forall a. (Char -> Maybe a) -> REText a
token ((Char -> Maybe Word) -> RE TextToken Word)
-> (Char -> Maybe Word) -> RE TextToken Word
forall a b. (a -> b) -> a -> b
$ \Char
c ->
  let dec :: Word
dec = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0')
      hexl :: Word
hexl = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a')
      hexu :: Word
hexu = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A')
  in do
    Word
d <- case () of
      ()
_ | Word
dec Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
9 -> Word -> Maybe Word
forall a. a -> Maybe a
Just Word
dec
        | Word
hexl Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 -> Word -> Maybe Word
forall a. a -> Maybe a
Just (Word -> Maybe Word) -> Word -> Maybe Word
forall a b. (a -> b) -> a -> b
$! Word
10 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
hexl
        | Word
hexu Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 -> Word -> Maybe Word
forall a. a -> Maybe a
Just (Word -> Maybe Word) -> Word -> Maybe Word
forall a b. (a -> b) -> a -> b
$! Word
10 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
hexu
        | Bool
otherwise -> Maybe Word
forall a. Maybe a
Nothing
    if Word
l Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
d Bool -> Bool -> Bool
&& Word
d Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
h then Word -> Maybe Word
forall a. a -> Maybe a
Just Word
d else Maybe Word
forall a. Maybe a
Nothing
-- TODO: This can surely be optimized

#ifdef __GLASGOW_HASKELL__
--------------------
-- Slicing helpers
--------------------

tokenToSlice :: TextToken -> Text
tokenToSlice :: TextToken -> Text
tokenToSlice TextToken
t =
  Array -> Int -> Int -> Text
TInternal.Text (TextToken -> Array
tArr TextToken
t) (TextToken -> Int
tOffset TextToken
t) (Char -> Int
TInternalUtf8.utf8Length (TextToken -> Char
tChar TextToken
t))

tokenMatch :: (TextToken -> Maybe a) -> REText Text
tokenMatch :: forall a. (TextToken -> Maybe a) -> REText Text
tokenMatch TextToken -> Maybe a
t = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> TextToken -> Text
tokenToSlice TextToken
tok Text -> Maybe a -> Maybe Text
forall a b. a -> Maybe b -> Maybe a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ TextToken -> Maybe a
t TextToken
tok)

tokenWithMatch :: (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch :: forall a. (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch TextToken -> Maybe a
t = (TextToken -> Maybe (WithMatch a)) -> RE TextToken (WithMatch a)
forall c a. (c -> Maybe a) -> RE c a
R.token (\ !TextToken
tok -> Text -> a -> WithMatch a
forall a. Text -> a -> WithMatch a
WM (TextToken -> Text
tokenToSlice TextToken
tok) (a -> WithMatch a) -> Maybe a -> Maybe (WithMatch a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TextToken -> Maybe a
t TextToken
tok)

anyTokenMatch :: REText Text
anyTokenMatch :: REText Text
anyTokenMatch = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token (\TextToken
tok -> Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok)

ignoreCaseTokenMatch :: Char -> REText Text
ignoreCaseTokenMatch :: Char -> REText Text
ignoreCaseTokenMatch Char
c = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token ((TextToken -> Maybe Text) -> REText Text)
-> (TextToken -> Maybe Text) -> REText Text
forall a b. (a -> b) -> a -> b
$ \TextToken
tok ->
  if Char -> Char
CF.caseFoldSimple (TextToken -> Char
tChar TextToken
tok) Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c'
  then Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok
  else Maybe Text
forall a. Maybe a
Nothing
  where
    !c' :: Char
c' = Char -> Char
CF.caseFoldSimple Char
c

oneOfTokenMatch :: CharSet -> REText Text
oneOfTokenMatch :: CharSet -> REText Text
oneOfTokenMatch !CharSet
cs = (TextToken -> Maybe Text) -> REText Text
forall c a. (c -> Maybe a) -> RE c a
R.token ((TextToken -> Maybe Text) -> REText Text)
-> (TextToken -> Maybe Text) -> REText Text
forall a b. (a -> b) -> a -> b
$ \TextToken
tok ->
  if Char -> CharSet -> Bool
CS.member (TextToken -> Char
tChar TextToken
tok) CharSet
cs
  then Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$! TextToken -> Text
tokenToSlice TextToken
tok
  else Maybe Text
forall a. Maybe a
Nothing
#endif

----------------
-- Match stuff
----------------

-- | Rebuild the @RE@ such that the result is the matched @Text@ instead.
toMatch :: REText a -> REText Text
#ifdef __GLASGOW_HASKELL__
toMatch :: forall a. REText a -> REText Text
toMatch = REText a -> REText Text
forall a. REText a -> REText Text
go
  where
    go :: REText b -> REText Text
    go :: forall a. REText a -> REText Text
go REText b
re = case REText b
re of
      RToken TextToken -> Maybe b
t -> (TextToken -> Maybe b) -> REText Text
forall a. (TextToken -> Maybe a) -> REText Text
tokenMatch TextToken -> Maybe b
t
      RFmap Strictness
_ a1 -> b
_ RE TextToken a1
re1 -> RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1
      RFmap_ b
_ RE TextToken a1
re1 -> RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1
      RPure b
_ -> Text -> REText Text
forall a c. a -> RE c a
RPure Text
T.empty
      RLiftA2 Strictness
_ a1 -> a2 -> b
_ RE TextToken a1
re1 RE TextToken a2
re2 ->
        Strictness
-> (Text -> Text -> Text)
-> REText Text
-> REText Text
-> REText Text
forall a1 a2 a c.
Strictness -> (a1 -> a2 -> a) -> RE c a1 -> RE c a2 -> RE c a
RLiftA2 Strictness
Strict Text -> Text -> Text
unsafeAdjacentAppend (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1) (RE TextToken a2 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a2
re2)
      REText b
REmpty -> REText Text
forall c a. RE c a
REmpty
      RAlt REText b
re1 REText b
re2 -> REText Text -> REText Text -> REText Text
forall c a. RE c a -> RE c a -> RE c a
RAlt (REText b -> REText Text
forall a. REText a -> REText Text
go REText b
re1) (REText b -> REText Text
forall a. REText a -> REText Text
go REText b
re2)
      RMany a1 -> b
_ a2 -> b
_ a2 -> a1 -> a2
_ a2
_ RE TextToken a1
re1 ->
        Strictness
-> Greediness
-> (Text -> Text -> Text)
-> Text
-> REText Text
-> REText Text
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
Greedy Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1)
      RFold Strictness
_ Greediness
gr b -> a1 -> b
_ b
_ RE TextToken a1
re1 ->
        Strictness
-> Greediness
-> (Text -> Text -> Text)
-> Text
-> REText Text
-> REText Text
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
gr Text -> Text -> Text
unsafeAdjacentAppend Text
T.empty (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
go RE TextToken a1
re1)
#else
toMatch = fmap (T.pack . map tChar) . RL.toMatch
#endif

-- | Rebuild the @RE@ to include the matched @Text@ alongside the result.
withMatch :: REText a -> REText (Text, a)
#ifdef __GLASGOW_HASKELL__
data WithMatch a = WM {-# UNPACK #-} !Text a

instance Functor WithMatch where
  fmap :: forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmap a -> b
f (WM Text
t a
x) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM Text
t (a -> b
f a
x)

fmapWM' :: (a -> b) -> WithMatch a -> WithMatch b
fmapWM' :: forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a -> b
f (WM Text
t a
x) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM Text
t (b -> WithMatch b) -> b -> WithMatch b
forall a b. (a -> b) -> a -> b
$! a -> b
f a
x

instance Applicative WithMatch where
  pure :: forall a. a -> WithMatch a
pure = Text -> a -> WithMatch a
forall a. Text -> a -> WithMatch a
WM Text
T.empty
  liftA2 :: forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2 a -> b -> c
f (WM Text
t1 a
x) (WM Text
t2 b
y) = Text -> c -> WithMatch c
forall a. Text -> a -> WithMatch a
WM (Text -> Text -> Text
unsafeAdjacentAppend Text
t1 Text
t2) (a -> b -> c
f a
x b
y)

liftA2WM' :: (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
liftA2WM' :: forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a1 -> a2 -> b
f (WM Text
t1 a1
x) (WM Text
t2 a2
y) = Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM (Text -> Text -> Text
unsafeAdjacentAppend Text
t1 Text
t2) (b -> WithMatch b) -> b -> WithMatch b
forall a b. (a -> b) -> a -> b
$! a1 -> a2 -> b
f a1
x a2
y

withMatch :: forall a. REText a -> REText (Text, a)
withMatch = (WithMatch a -> (Text, a))
-> RE TextToken (WithMatch a) -> RE TextToken (Text, a)
forall a b c. (a -> b) -> RE c a -> RE c b
R.fmap' (\(WM Text
t a
x) -> (Text
t,a
x)) (RE TextToken (WithMatch a) -> RE TextToken (Text, a))
-> (REText a -> RE TextToken (WithMatch a))
-> REText a
-> RE TextToken (Text, a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> RE TextToken (WithMatch a)
forall b. REText b -> REText (WithMatch b)
go
  where
    go :: REText b -> REText (WithMatch b)
    go :: forall b. REText b -> REText (WithMatch b)
go REText b
re = case REText b
re of
      RToken TextToken -> Maybe b
t -> (TextToken -> Maybe b) -> REText (WithMatch b)
forall a. (TextToken -> Maybe a) -> REText (WithMatch a)
tokenWithMatch TextToken -> Maybe b
t
      RFmap Strictness
st a1 -> b
f RE TextToken a1
re1 ->
        let g :: WithMatch a1 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a1 -> b
f
              Strictness
NonStrict -> (a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a1 -> b
f
        in Strictness
-> (WithMatch a1 -> WithMatch b)
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a1 a c. Strictness -> (a1 -> a) -> RE c a1 -> RE c a
RFmap Strictness
Strict WithMatch a1 -> WithMatch b
g (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)
      RFmap_ b
b RE TextToken a1
re1 -> Strictness
-> (Text -> WithMatch b) -> REText Text -> REText (WithMatch b)
forall a1 a c. Strictness -> (a1 -> a) -> RE c a1 -> RE c a
RFmap Strictness
Strict ((Text -> b -> WithMatch b) -> b -> Text -> WithMatch b
forall a b c. (a -> b -> c) -> b -> a -> c
flip Text -> b -> WithMatch b
forall a. Text -> a -> WithMatch a
WM b
b) (RE TextToken a1 -> REText Text
forall a. REText a -> REText Text
toMatch RE TextToken a1
re1)
      RPure b
b -> WithMatch b -> REText (WithMatch b)
forall a c. a -> RE c a
RPure (b -> WithMatch b
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
b)
      RLiftA2 Strictness
st a1 -> a2 -> b
f RE TextToken a1
re1 RE TextToken a2
re2 ->
        let g :: WithMatch a1 -> WithMatch a2 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a1 -> a2 -> b
f
              Strictness
NonStrict -> (a1 -> a2 -> b) -> WithMatch a1 -> WithMatch a2 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
Ap.liftA2 a1 -> a2 -> b
f
        in Strictness
-> (WithMatch a1 -> WithMatch a2 -> WithMatch b)
-> RE TextToken (WithMatch a1)
-> RE TextToken (WithMatch a2)
-> REText (WithMatch b)
forall a1 a2 a c.
Strictness -> (a1 -> a2 -> a) -> RE c a1 -> RE c a2 -> RE c a
RLiftA2 Strictness
Strict WithMatch a1 -> WithMatch a2 -> WithMatch b
g (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1) (RE TextToken a2 -> RE TextToken (WithMatch a2)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a2
re2)
      REText b
REmpty -> REText (WithMatch b)
forall c a. RE c a
REmpty
      RAlt REText b
re1 REText b
re2 -> REText (WithMatch b)
-> REText (WithMatch b) -> REText (WithMatch b)
forall c a. RE c a -> RE c a -> RE c a
RAlt (REText b -> REText (WithMatch b)
forall b. REText b -> REText (WithMatch b)
go REText b
re1) (REText b -> REText (WithMatch b)
forall b. REText b -> REText (WithMatch b)
go REText b
re2)
      RMany a1 -> b
f1 a2 -> b
f2 a2 -> a1 -> a2
f a2
z RE TextToken a1
re1 ->
        (WithMatch a1 -> WithMatch b)
-> (WithMatch a2 -> WithMatch b)
-> (WithMatch a2 -> WithMatch a1 -> WithMatch a2)
-> WithMatch a2
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a1 a a2 c.
(a1 -> a)
-> (a2 -> a) -> (a2 -> a1 -> a2) -> a2 -> RE c a1 -> RE c a
RMany ((a1 -> b) -> WithMatch a1 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a1 -> b
f1) ((a2 -> b) -> WithMatch a2 -> WithMatch b
forall a b. (a -> b) -> WithMatch a -> WithMatch b
fmapWM' a2 -> b
f2) ((a2 -> a1 -> a2) -> WithMatch a2 -> WithMatch a1 -> WithMatch a2
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' a2 -> a1 -> a2
f) (a2 -> WithMatch a2
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a2
z) (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)
      RFold Strictness
st Greediness
gr b -> a1 -> b
f b
z RE TextToken a1
re1 ->
        let g :: WithMatch b -> WithMatch a1 -> WithMatch b
g = case Strictness
st of
              Strictness
Strict -> (b -> a1 -> b) -> WithMatch b -> WithMatch a1 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
liftA2WM' b -> a1 -> b
f
              Strictness
NonStrict -> (b -> a1 -> b) -> WithMatch b -> WithMatch a1 -> WithMatch b
forall a b c.
(a -> b -> c) -> WithMatch a -> WithMatch b -> WithMatch c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
Ap.liftA2 b -> a1 -> b
f
        in Strictness
-> Greediness
-> (WithMatch b -> WithMatch a1 -> WithMatch b)
-> WithMatch b
-> RE TextToken (WithMatch a1)
-> REText (WithMatch b)
forall a a1 c.
Strictness
-> Greediness -> (a -> a1 -> a) -> a -> RE c a1 -> RE c a
RFold Strictness
Strict Greediness
gr WithMatch b -> WithMatch a1 -> WithMatch b
g (b -> WithMatch b
forall a. a -> WithMatch a
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
z) (RE TextToken a1 -> RE TextToken (WithMatch a1)
forall b. REText b -> REText (WithMatch b)
go RE TextToken a1
re1)
#else
withMatch = fmap (\(toks, x) -> (T.pack (map tChar toks), x)) . RL.withMatch
#endif

----------
-- Parse
----------

textTokenFoldr :: (TextToken -> b -> b) -> b -> Text -> b
#ifdef __GLASGOW_HASKELL__
textTokenFoldr :: forall b. (TextToken -> b -> b) -> b -> Text -> b
textTokenFoldr TextToken -> b -> b
f b
z (TInternal.Text Array
a Int
o0 Int
l) = Int -> b
loop Int
o0
  where
    loop :: Int -> b
loop Int
o | Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
o0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l = b
z
    loop Int
o = case Array -> Int -> Iter
TUnsafe.iterArray Array
a Int
o of
      TUnsafe.Iter Char
c Int
clen -> TextToken -> b -> b
f (Array -> Int -> Char -> TextToken
TextToken Array
a Int
o Char
c) (Int -> b
loop (Int
o Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
clen))
{-# INLINE textTokenFoldr #-}
#else
textTokenFoldr f = T.foldr (f . TextToken)
#endif

-- | \(O(mn \log m)\). Parse a @Text@ with a @REText@.
--
-- Parses the entire @Text@, not just a prefix or a substring.
--
-- Uses 'Regex.Text.compile', see the note there.
--
-- If parsing multiple @Text@s using the same @RE@, it is wasteful to compile
-- the @RE@ every time. So, prefer to
--
-- * Compile once with 'Regex.Text.compile' or 'Regex.Text.compileBounded' and
--   use the compiled 'ParserText'  with 'parse' as many times as required.
-- * Alternately, partially apply this function to a @RE@ and use the function
--   as many times as required.
reParse :: REText a -> Text -> Maybe a
reParse :: forall a. REText a -> Text -> Maybe a
reParse REText a
re = let !p :: Parser TextToken a
p = REText a -> Parser TextToken a
forall c a. RE c a -> Parser c a
P.compile REText a
re in Parser TextToken a -> Text -> Maybe a
forall a. ParserText a -> Text -> Maybe a
parse Parser TextToken a
p
{-# INLINE reParse #-}

-- | \(O(mn \log m)\). Parse a @Text@ with a @ParserText@.
--
-- Parses the entire @Text@, not just a prefix or a substring.
parse :: ParserText a -> Text -> Maybe a
parse :: forall a. ParserText a -> Text -> Maybe a
parse = (forall b. (TextToken -> b -> b) -> b -> Text -> b)
-> Parser TextToken a -> Text -> Maybe a
forall f c a. Foldr f c -> Parser c a -> f -> Maybe a
P.parseFoldr (TextToken -> b -> b) -> b -> Text -> b
forall b. (TextToken -> b -> b) -> b -> Text -> b
textTokenFoldr

-- | \(O(mn \log m)\). Parse a @Text@ with a @ParserText@. Calls 'error' on
-- parse failure.
--
-- For use with parsers that are known to never fail.
--
-- Parses the entire @Text@, not just a prefix or a substring.
parseSure :: ParserText a -> Text -> a
parseSure :: forall a. ParserText a -> Text -> a
parseSure ParserText a
p = a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
forall a. a
parseSureError (Maybe a -> a) -> (Text -> Maybe a) -> Text -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParserText a -> Text -> Maybe a
forall a. ParserText a -> Text -> Maybe a
parse ParserText a
p

parseSureError :: a
parseSureError :: forall a. a
parseSureError = [Char] -> a
forall a. HasCallStack => [Char] -> a
error
  [Char]
"Regex.Text.parseSure: parse failed; if parsing can fail use 'parse' instead"

reParseSure :: REText a -> Text -> a
reParseSure :: forall a. REText a -> Text -> a
reParseSure REText a
re = a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
forall a. a
parseSureError (Maybe a -> a) -> (Text -> Maybe a) -> Text -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> Text -> Maybe a
forall a. REText a -> Text -> Maybe a
reParse REText a
re
{-# INLINE reParseSure #-}

-- | \(O(mn \log m)\). Find the first occurence of the given @RE@ in a @Text@.
--
-- ==== __Examples__
--
-- >>> find (text "meow") "homeowner"
-- Just "meow"
--
-- To test whether a @Text@ is present in another @Text@, like above, prefer
-- @Data.Text.'T.isInfixOf'@.
--
-- >>> find (textIgnoreCase "haskell") "Look I'm Haskelling!"
-- Just "Haskell"
-- >>> find (text "backtracking") "parser-regex"
-- Nothing
--
find :: REText a -> Text -> Maybe a
find :: forall a. REText a -> Text -> Maybe a
find = REText a -> Text -> Maybe a
forall a. REText a -> Text -> Maybe a
reParse (REText a -> Text -> Maybe a)
-> (REText a -> REText a) -> REText a -> Text -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText a
forall c a. RE c a -> RE c a
R.toFind
{-# INLINE find #-}

-- | \(O(mn \log m)\). Find all non-overlapping occurences of the given @RE@ in
-- the @Text@.
--
-- ==== __Examples__
--
-- >>> findAll (text "ana") "banananana"
-- ["ana","ana"]
--
-- @
-- data Roll = Roll
--   Natural -- ^ Rolls
--   Natural -- ^ Faces on the die
--   deriving Show
--
-- roll :: REText Roll
-- roll = Roll \<$> ('naturalDec' \<|> pure 1) \<* 'char' \'d\' \<*> naturalDec
-- @
--
-- >>> findAll roll "3d6, d10, 2d10"
-- [Roll 3 6,Roll 1 10,Roll 2 10]
--
findAll :: REText a -> Text -> [a]
findAll :: forall a. REText a -> Text -> [a]
findAll = REText [a] -> Text -> [a]
forall a. REText a -> Text -> a
reParseSure (REText [a] -> Text -> [a])
-> (REText a -> REText [a]) -> REText a -> Text -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText [a]
forall c a. RE c a -> RE c [a]
R.toFindMany
{-# INLINE findAll #-}

-- | \(O(mn \log m)\). Split a @Text@ at occurences of the given @RE@.
--
-- ==== __Examples__
--
-- >>> splitOn (char ' ') "Glasses are really versatile"
-- ["Glasses","are","really","versatile"]
--
-- For simple splitting, like above, prefer @Data.Text.'Data.Text.words'@,
-- @Data.Text.'Data.Text.lines'@, @Data.Text.'Data.Text.split'@ or
-- @Data.Text.'Data.Text.splitOn'@, whichever is applicable.
--
-- >>> splitOn (char ' ' *> oneOf "+-=" *> char ' ') "3 - 1 + 1/2 - 2 = 0"
-- ["3","1","1/2","2","0"]
--
-- If the @Text@ starts or ends with a delimiter, the result will contain
-- empty @Text@s at those positions.
--
-- >>> splitOn (char 'a') "ayaya"
-- ["","y","y",""]
--
splitOn :: REText a -> Text -> [Text]
splitOn :: forall a. REText a -> Text -> [Text]
splitOn = REText [Text] -> Text -> [Text]
forall a. REText a -> Text -> a
reParseSure (REText [Text] -> Text -> [Text])
-> (REText a -> REText [Text]) -> REText a -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText a -> REText [Text]
forall a. REText a -> REText [Text]
toSplitOn
{-# INLINE splitOn #-}

toSplitOn :: REText a -> REText [Text]
toSplitOn :: forall a. REText a -> REText [Text]
toSplitOn REText a
re = REText Text
manyTextMin REText Text -> REText a -> REText [Text]
forall c a sep. RE c a -> RE c sep -> RE c [a]
`R.sepBy` REText a
re

-- | \(O(mn \log m)\). Replace the first match of the given @RE@ with its
-- result. If there is no match, the result is @Nothing@.
--
-- ==== __Examples__
--
-- >>> replace ("world" <$ text "Haskell") "Hello, Haskell!"
-- Just "Hello, world!"
--
-- >>> replace ("," <$ some (char '.')) "one...two...ten"
-- Just "one,two...ten"
--
replace :: REText Text -> Text -> Maybe Text
replace :: REText Text -> Text -> Maybe Text
replace = REText Text -> Text -> Maybe Text
forall a. REText a -> Text -> Maybe a
reParse (REText Text -> Text -> Maybe Text)
-> (REText Text -> REText Text)
-> REText Text
-> Text
-> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText Text -> REText Text
toReplace
{-# INLINE replace #-}

toReplace :: REText Text -> REText Text
toReplace :: REText Text -> REText Text
toReplace REText Text
re = (Text -> Text -> Text -> Text)
-> REText Text -> REText Text -> RE TextToken (Text -> Text)
forall a b c.
(a -> b -> c) -> RE TextToken a -> RE TextToken b -> RE TextToken c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
Ap.liftA2 Text -> Text -> Text -> Text
f REText Text
manyTextMin REText Text
re RE TextToken (Text -> Text) -> REText Text -> REText Text
forall a b.
RE TextToken (a -> b) -> RE TextToken a -> RE TextToken b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> REText Text
manyText
  where
#ifdef __GLASGOW_HASKELL__
    f :: Text -> Text -> Text -> Text
f Text
a Text
b Text
c = [Text] -> Text
reverseConcat [Text
c,Text
b,Text
a]
#else
    f a b c = T.concat [a,b,c]
#endif

-- | \(O(mn \log m)\). Replace all non-overlapping matches of the given @RE@
-- with their results.
--
-- ==== __Examples__
--
-- >>> replaceAll (" and " <$ text ", ") "red, blue, green"
-- "red and blue and green"
--
-- For simple replacements like above, prefer @Data.Text.'Data.Text.replace'@.
--
-- >>> replaceAll ("Fruit" <$ text "Time" <|> "a banana" <$ text "an arrow") "Time flies like an arrow"
-- "Fruit flies like a banana"
--
-- @
-- sep = 'oneOf' "-./"
-- digits n = 'toMatch' ('Control.Monad.replicateM_' n (oneOf 'Data.CharSet.digit'))
-- toYmd d m y = mconcat [y, \"-\", m, \"-\", d]
-- date = toYmd \<$> digits 2 \<* sep
--              \<*> digits 2 \<* sep
--              \<*> digits 4
-- @
-- >>> replaceAll date "01/01/1970, 01-04-1990, 03.07.2011"
-- "1970-01-01, 1990-04-01, 2011-07-03"
--
replaceAll :: REText Text -> Text -> Text
replaceAll :: REText Text -> Text -> Text
replaceAll = REText Text -> Text -> Text
forall a. REText a -> Text -> a
reParseSure (REText Text -> Text -> Text)
-> (REText Text -> REText Text) -> REText Text -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. REText Text -> REText Text
toReplaceMany
{-# INLINE replaceAll #-}

toReplaceMany :: REText Text -> REText Text
toReplaceMany :: REText Text -> REText Text
toReplaceMany REText Text
re =
#ifdef __GLASGOW_HASKELL__
  [Text] -> Text
reverseConcat ([Text] -> Text) -> REText [Text] -> REText Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([Text] -> Text -> [Text])
-> [Text] -> REText Text -> REText [Text]
forall b a c. (b -> a -> b) -> b -> RE c a -> RE c b
R.foldlMany' ((Text -> [Text] -> [Text]) -> [Text] -> Text -> [Text]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (:)) [] (REText Text
re REText Text -> REText Text -> REText Text
forall a. RE TextToken a -> RE TextToken a -> RE TextToken a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> REText Text
anyTokenMatch)
#else
  T.concat <$> many (re <|> T.singleton <$> anyChar)
#endif

#ifdef __GLASGOW_HASKELL__
-------------------------
-- Low level Text stuff
-------------------------

-- WARNING: If t1 and t2 are not empty, they must be adjacent slices of the
-- same Text. In other words, sameByteArray# a1 _a2 && o1 + l1 == _o2.
unsafeAdjacentAppend :: Text -> Text -> Text
unsafeAdjacentAppend :: Text -> Text -> Text
unsafeAdjacentAppend t1 :: Text
t1@(TInternal.Text Array
a1 Int
o1 Int
l1) t2 :: Text
t2@(TInternal.Text Array
_a2 Int
_o2 Int
l2)
  | Text -> Bool
T.null Text
t1 = Text
t2
  | Text -> Bool
T.null Text
t2 = Text
t1
  | Bool
otherwise = Array -> Int -> Int -> Text
TInternal.Text Array
a1 Int
o1 (Int
l1Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l2)

-- reverseConcat = T.concat . reverse
reverseConcat :: [Text] -> Text
reverseConcat :: [Text] -> Text
reverseConcat [Text]
ts = case [Text]
ts of
  [] -> Text
T.empty
  [Text
t] -> Text
t
  [Text]
_ | Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> Text
T.empty
    | Bool
otherwise -> Array -> Int -> Int -> Text
TInternal.Text Array
arr Int
0 Int
len
  where
    flen :: Int -> Text -> Int
flen Int
acc (TInternal.Text Array
_ Int
_ Int
l)
      | Int
acc' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = Int
acc'
      | Bool
otherwise = Int
forall a. a
reverseConcatOverflowError
      where
        acc' :: Int
acc' = Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l
    len :: Int
len = (Int -> Text -> Int) -> Int -> [Text] -> Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' Int -> Text -> Int
flen Int
0 [Text]
ts
    arr :: Array
arr = (forall s. ST s (MArray s)) -> Array
TArray.run ((forall s. ST s (MArray s)) -> Array)
-> (forall s. ST s (MArray s)) -> Array
forall a b. (a -> b) -> a -> b
$ do
      MArray s
marr <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
TArray.new Int
len
      let loop :: Int -> [Text] -> ST s (MArray s)
loop !Int
_ [] = MArray s -> ST s (MArray s)
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure MArray s
marr
          loop Int
i (TInternal.Text Array
a Int
o Int
l : [Text]
ts') =
            Int -> MArray s -> Int -> Array -> Int -> ST s ()
forall s. Int -> MArray s -> Int -> Array -> Int -> ST s ()
TArray.copyI Int
l MArray s
marr (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) Array
a Int
o ST s () -> ST s (MArray s) -> ST s (MArray s)
forall a b. ST s a -> ST s b -> ST s b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> [Text] -> ST s (MArray s)
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l) [Text]
ts'
      Int -> [Text] -> ST s (MArray s)
loop Int
len [Text]
ts

reverseConcatOverflowError :: a
reverseConcatOverflowError :: forall a. a
reverseConcatOverflowError =
  [Char] -> a
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Regex.Text.reverseConcat: size overflow"
#endif

----------
-- Notes
----------

-- Note [Why simple case fold]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Unicode defines two different ways to case fold, "simple" and "full". Full is
-- superior to simple, and capable of folding more pairs of texts to the same
-- text. This is what is used by Data.Text.toCaseFold.
--
-- However, full maps a Char to one or more Chars, for instance "ß" maps to
-- "ss". Since we operate on one Char at a time without backtracking, we must
-- have branching in our regex corresponding to possible texts that case fold to
-- a target text. For instance, to match "sssss" with full case fold given the
-- above mapping, possible inputs are
--
-- sssss, sssß, ssßs, sßss, ßsss, sßß, ßsß, ßßs
--
-- Fun fact: the number of strings that match "s"*n is Fibonacci(n+1).
-- Of course, we can't have textIgnoreCase take a text and explode into a regex
-- of exponential size.
--
-- So, we restrict ourselves to simple case folding. Simple case folding
-- maps a single Char to a single Char. And it's easy to test that the required
-- Char and a Char in the input case fold to the same Char.
--
-- Note that charIgnoreCase could possibly use full case folding. Only a small
-- number of texts would case fold to the case fold of a single Char. But we
-- stick with simple case fold for consistency.