module Regexp where
import Control.Exception
import Data.Array (bounds, (!))
import qualified Data.ByteString.Char8 as B
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import qualified Text.Regex.PCRE.ByteString as R
compile :: B.ByteString -> IO R.Regex
compile :: ByteString -> IO Regex
compile ByteString
pat = do
Either (MatchOffset, String) Regex
compiled <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
R.compile CompOption
R.compBlank ExecOption
R.execBlank ByteString
pat
case Either (MatchOffset, String) Regex
compiled of
Left (MatchOffset
_, String
err) -> IOError -> IO Regex
forall e a. Exception e => e -> IO a
throwIO (String -> IOError
userError (String
"Regex compilation failed: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
err))
Right Regex
regex -> Regex -> IO Regex
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Regex
regex
match :: B.ByteString -> B.ByteString -> IO Bool
match :: ByteString -> ByteString -> IO Bool
match ByteString
pattern ByteString
input = do
Regex
regex <- ByteString -> IO Regex
compile ByteString
pattern
Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
res <- Regex
-> ByteString
-> IO
(Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
case Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
res of
Left WrapError
_ -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
Right (Just Array MatchOffset (MatchOffset, MatchOffset)
_) -> Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
extractGroups :: R.Regex -> B.ByteString -> IO [B.ByteString]
Regex
regex ByteString
input = do
Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
(Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
case Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
Left WrapError
_ -> [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) ->
let (MatchOffset
start, MatchOffset
end) = Array MatchOffset (MatchOffset, MatchOffset)
-> (MatchOffset, MatchOffset)
forall i e. Array i e -> (i, i)
bounds Array MatchOffset (MatchOffset, MatchOffset)
arr
groups :: [ByteString]
groups =
[ let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
i
in if MatchOffset
off MatchOffset -> MatchOffset -> Bool
forall a. Eq a => a -> a -> Bool
== -MatchOffset
1 then ByteString
B.empty else MatchOffset -> ByteString -> ByteString
B.take MatchOffset
len (MatchOffset -> ByteString -> ByteString
B.drop MatchOffset
off ByteString
input)
| MatchOffset
i <- [MatchOffset
start .. MatchOffset
end]
]
in [ByteString] -> IO [ByteString]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [ByteString]
groups
substituteGroups :: B.ByteString -> [B.ByteString] -> B.ByteString
substituteGroups :: ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups = [ByteString] -> ByteString
B.concat (String -> [ByteString]
go (ByteString -> String
B.unpack ByteString
rep))
where
go :: String -> [ByteString]
go [] = []
go (Char
'$' : String
rest) =
let (String
digits, String
afterDigits) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
rest
in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits
then Char -> ByteString
B.singleton Char
'$' ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
rest
else
let idx :: MatchOffset
idx = String -> MatchOffset
forall a. Read a => String -> a
read String
digits
val :: ByteString
val = ByteString -> Maybe ByteString -> ByteString
forall a. a -> Maybe a -> a
fromMaybe (String -> ByteString
B.pack (Char
'$' Char -> String -> String
forall a. a -> [a] -> [a]
: String
digits)) (MatchOffset -> [ByteString] -> Maybe ByteString
forall {a}. MatchOffset -> [a] -> Maybe a
safeIndex MatchOffset
idx [ByteString]
groups)
in ByteString
val ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
afterDigits
go (Char
c : String
rest) = Char -> ByteString
B.singleton Char
c ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
rest
safeIndex :: MatchOffset -> [a] -> Maybe a
safeIndex MatchOffset
i [a]
xs
| MatchOffset
i MatchOffset -> MatchOffset -> Bool
forall a. Ord a => a -> a -> Bool
>= MatchOffset
0 Bool -> Bool -> Bool
&& MatchOffset
i MatchOffset -> MatchOffset -> Bool
forall a. Ord a => a -> a -> Bool
< [a] -> MatchOffset
forall a. [a] -> MatchOffset
forall (t :: * -> *) a. Foldable t => t a -> MatchOffset
length [a]
xs = a -> Maybe a
forall a. a -> Maybe a
Just ([a]
xs [a] -> MatchOffset -> a
forall a. HasCallStack => [a] -> MatchOffset -> a
!! MatchOffset
i)
| Bool
otherwise = Maybe a
forall a. Maybe a
Nothing
replaceFirst :: R.Regex -> B.ByteString -> B.ByteString -> IO B.ByteString
replaceFirst :: Regex -> ByteString -> ByteString -> IO ByteString
replaceFirst Regex
regex ByteString
rep ByteString
input = do
Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
(Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
input
case Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
Left WrapError
_ -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
input
Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
input
Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) -> do
[ByteString]
groups <- Regex -> ByteString -> IO [ByteString]
extractGroups Regex
regex ByteString
input
let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
0
(ByteString
before, ByteString
rest) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
off ByteString
input
(ByteString
_, ByteString
after) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
len ByteString
rest
replacement :: ByteString
replacement = ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups
ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
B.concat [ByteString
before, ByteString
replacement, ByteString
after]
replaceAll :: R.Regex -> B.ByteString -> B.ByteString -> IO B.ByteString
replaceAll :: Regex -> ByteString -> ByteString -> IO ByteString
replaceAll Regex
regex ByteString
rep ByteString
input = ByteString -> ByteString -> IO ByteString
go ByteString
input ByteString
B.empty
where
go :: ByteString -> ByteString -> IO ByteString
go ByteString
bs ByteString
acc = do
Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result <- Regex
-> ByteString
-> IO
(Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
R.execute Regex
regex ByteString
bs
case Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
result of
Left WrapError
_ -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
B.append ByteString
acc ByteString
bs
Right Maybe (Array MatchOffset (MatchOffset, MatchOffset))
Nothing -> ByteString -> IO ByteString
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> IO ByteString) -> ByteString -> IO ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
B.append ByteString
acc ByteString
bs
Right (Just Array MatchOffset (MatchOffset, MatchOffset)
arr) -> do
let (MatchOffset
off, MatchOffset
len) = Array MatchOffset (MatchOffset, MatchOffset)
arr Array MatchOffset (MatchOffset, MatchOffset)
-> MatchOffset -> (MatchOffset, MatchOffset)
forall i e. Ix i => Array i e -> i -> e
! MatchOffset
0
(ByteString
before, ByteString
rest1) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
off ByteString
bs
(ByteString
_, ByteString
rest2) = MatchOffset -> ByteString -> (ByteString, ByteString)
B.splitAt MatchOffset
len ByteString
rest1
[ByteString]
groups <- Regex -> ByteString -> IO [ByteString]
extractGroups Regex
regex ByteString
bs
let replacement :: ByteString
replacement = ByteString -> [ByteString] -> ByteString
substituteGroups ByteString
rep [ByteString]
groups
ByteString -> ByteString -> IO ByteString
go ByteString
rest2 ([ByteString] -> ByteString
B.concat [ByteString
acc, ByteString
before, ByteString
replacement])