-- SPDX-FileCopyrightText: Copyright (c) 2025 Objectionary.com
-- SPDX-License-Identifier: MIT

module Regexp where

import Control.Exception
import Data.Array (bounds, (!))
import qualified Data.ByteString.Char8 as B
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import qualified Text.Regex.PCRE.ByteString as R

compile :: B.ByteString -> IO R.Regex
compile :: ByteString -> IO Regex
compile ByteString
pat = do
  Either (MatchOffset, String) Regex
compiled <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
R.compile CompOption
R.compBlank ExecOption
R.execBlank ByteString
pat
  case Either (MatchOffset, String) Regex
compiled of
    Left (MatchOffset
_, String
err) -> IOError -> IO Regex
forall e a. Exception e => e -> IO a
throwIO (String -> IOError
userError (String
"Regex compilation failed: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
err))
    Right Regex
regex -> Regex -> IO Regex
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Regex
regex

match :: B.ByteString -> B.ByteString -> IO Bool
match :: ByteString -> ByteString -> IO Bool
match ByteString
pattern ByteString
input = do
  Regex
regex <- ByteString -> IO Regex
compile ByteString
pattern
  Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
res <- Regex
-> ByteString
-> IO
     (Either
        WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
  case Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
res of
    Left WrapError
_ -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
    Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
    Right (Just Array MatchOffset (MatchOffset, MatchOffset)
_) -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True

extractGroups :: R.Regex -> B.ByteString -> IO [B.ByteString]
extractGroups :: Regex -> ByteString -> IO [ByteString]
extractGroups Regex
regex ByteString
input = do
  Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
     (Either
        WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
  case Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
    Left WrapError
_ -> [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
    Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
    Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) ->
      let (MatchOffset
start, MatchOffset
end) = Array MatchOffset (MatchOffset, MatchOffset)
-> (MatchOffset, MatchOffset)
forall i e. Array i e -> (i, i)
bounds Array MatchOffset (MatchOffset, MatchOffset)
arr
          groups :: [ByteString]
groups =
            [ let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
i
               in if MatchOffset
off MatchOffset -> MatchOffset -> Bool
forall a. Eq a => a -> a -> Bool
== -MatchOffset
1 then ByteString
B.empty else MatchOffset -> ByteString -> ByteString
B.take MatchOffset
len (MatchOffset -> ByteString -> ByteString
B.drop MatchOffset
off ByteString
input)
              | MatchOffset
i <- [MatchOffset
start .. MatchOffset
end]
            ]
       in [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [ByteString]
groups

substituteGroups :: B.ByteString -> [B.ByteString] -> B.ByteString
substituteGroups :: ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups = [ByteString] -> ByteString
B.concat (String -> [ByteString]
go (ByteString -> String
B.unpack ByteString
rep))
  where
    go :: String -> [ByteString]
go [] = []
    go (Char
'$' : String
rest) =
      let (String
digits, String
afterDigits) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
rest
       in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits
            then Char -> ByteString
B.singleton Char
'$' ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
rest
            else
              let idx :: MatchOffset
idx = String -> MatchOffset
forall a. Read a => String -> a
read String
digits
                  val :: ByteString
val = ByteString -> Maybe ByteString -> ByteString
forall a. a -> Maybe a -> a
fromMaybe (String -> ByteString
B.pack (Char
'$' Char -> String -> String
forall a. a -> [a] -> [a]
: String
digits)) (MatchOffset -> [ByteString] -> Maybe ByteString
forall {a}. MatchOffset -> [a] -> Maybe a
safeIndex MatchOffset
idx [ByteString]
groups)
               in ByteString
val ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
afterDigits
    go (Char
c : String
rest) = Char -> ByteString
B.singleton Char
c ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
rest
    safeIndex :: MatchOffset -> [a] -> Maybe a
safeIndex MatchOffset
i [a]
xs
      | MatchOffset
i MatchOffset -> MatchOffset -> Bool
forall a. Ord a => a -> a -> Bool
>= MatchOffset
0 Bool -> Bool -> Bool
&& MatchOffset
i MatchOffset -> MatchOffset -> Bool
forall a. Ord a => a -> a -> Bool
< [a] -> MatchOffset
forall a. [a] -> MatchOffset
forall (t :: * -> *) a. Foldable t => t a -> MatchOffset
length [a]
xs = a -> Maybe a
forall a. a -> Maybe a
Just ([a]
xs [a] -> MatchOffset -> a
forall a. HasCallStack => [a] -> MatchOffset -> a
!! MatchOffset
i)
      | Bool
otherwise = Maybe a
forall a. Maybe a
Nothing

replaceFirst :: R.Regex -> B.ByteString -> B.ByteString -> IO B.ByteString
replaceFirst :: Regex -> ByteString -> ByteString -> IO ByteString
replaceFirst Regex
regex ByteString
rep ByteString
input = do
  Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
     (Either
        WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
  case Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
    Left WrapError
_ -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
input
    Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
input
    Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) -> do
      [ByteString]
groups <- Regex -> ByteString -> IO [ByteString]
extractGroups Regex
regex ByteString
input
      let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
0
          (ByteString
before, ByteString
rest) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
off ByteString
input
          (ByteString
_, ByteString
after) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
len ByteString
rest
          replacement :: ByteString
replacement = ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups
      ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
B.concat [ByteString
before, ByteString
replacement, ByteString
after]

replaceAll :: R.Regex -> B.ByteString -> B.ByteString -> IO B.ByteString
replaceAll :: Regex -> ByteString -> ByteString -> IO ByteString
replaceAll Regex
regex ByteString
rep ByteString
input = ByteString -> ByteString -> IO ByteString
go ByteString
input ByteString
B.empty
  where
    go :: ByteString -> ByteString -> IO ByteString
go ByteString
bs ByteString
acc = do
      Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
     (Either
        WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
bs
      case Either
  WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
        Left WrapError
_ -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
B.append ByteString
acc ByteString
bs
        Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
B.append ByteString
acc ByteString
bs
        Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) -> do
          let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
0
              (ByteString
before, ByteString
rest1) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
off ByteString
bs
              (ByteString
_, ByteString
rest2) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
len ByteString
rest1
          [ByteString]
groups <- Regex -> ByteString -> IO [ByteString]
extractGroups Regex
regex ByteString
bs
          let replacement :: ByteString
replacement = ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups
          ByteString -> ByteString -> IO ByteString
go ByteString
rest2 ([ByteString] -> ByteString
B.concat [ByteString
acc, ByteString
before, ByteString
replacement])