-- | This is a POSIX version of parseRegex that allows NUL characters.
-- Lazy\/Possessive\/Backrefs are not recognized.  Anchors \^ and \$ are
-- recognized.
--
-- A 'PGroup' returned always has @(Maybe 'GroupIndex')@ set to @(Just _)@
-- and never to @Nothing@.

module Text.Regex.TDFA.ReadRegex (parseRegex) where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

import Text.Regex.TDFA.Pattern {- all -}
import Text.ParserCombinators.Parsec((<|>), (<?>),
  try, runParser, many, getState, setState, CharParser, ParseError,
  sepBy1, option, notFollowedBy, many1, lookAhead, eof, between,
  string, noneOf, digit, char, anyChar)

import Control.Monad (liftM, guard)

import Data.Foldable (asum)
import qualified Data.Set as Set(fromList)

-- | An element inside @[...]@, denoting a character class.
data BracketElement
  = BEChar  Char       -- ^ A single character.
  | BERange Char Char  -- ^ A character range (e.g. @a-z@).
  | BEColl  String     -- ^ @foo@ in @[.foo.]@.
  | BEEquiv String     -- ^ @bar@ in @[=bar=]@.
  | BEClass String     -- ^ A POSIX character class (candidate), e.g. @alpha@ parsed from @[:alpha:]@.

-- | Return either an error message or a tuple of the Pattern and the
-- largest group index and the largest DoPa index (both have smallest
-- index of 1).  Since the regular expression is supplied as [Char] it
-- automatically supports unicode and @\\NUL@ characters.
parseRegex :: String -> Either ParseError (Pattern,(GroupIndex,DoPa))
parseRegex :: [Char] -> Either ParseError (Pattern, (GroupIndex, DoPa))
parseRegex [Char]
x = GenParser
  Char (GroupIndex, GroupIndex) (Pattern, (GroupIndex, DoPa))
-> (GroupIndex, GroupIndex)
-> [Char]
-> [Char]
-> Either ParseError (Pattern, (GroupIndex, DoPa))
forall tok st a.
GenParser tok st a -> st -> [Char] -> [tok] -> Either ParseError a
runParser (do pat <- P Pattern
p_regex
                             eof
                             (lastGroupIndex,lastDopa) <- getState
                             return (pat,(lastGroupIndex,DoPa lastDopa))) (GroupIndex
0,GroupIndex
0) [Char]
x [Char]
x

type P = CharParser (GroupIndex, Int)

p_regex :: P Pattern
p_regex :: P Pattern
p_regex = ([Pattern] -> Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Pattern] -> Pattern
POr (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
 -> P Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall a b. (a -> b) -> a -> b
$ P Pattern
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
sepBy1 P Pattern
p_branch (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|')

-- man re_format helps a lot, it says one-or-more pieces so this is
-- many1 not many.  Use "()" to indicate an empty piece.
p_branch :: P Pattern
p_branch :: P Pattern
p_branch = ([Pattern] -> Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Pattern] -> Pattern
PConcat (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
 -> P Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
-> P Pattern
forall a b. (a -> b) -> a -> b
$ P Pattern
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Pattern]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 P Pattern
p_piece

p_piece :: P Pattern
p_piece :: P Pattern
p_piece = (P Pattern
p_anchor P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_atom) P Pattern -> (Pattern -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
p_post_atom -- correct specification

p_atom :: P Pattern
p_atom :: P Pattern
p_atom =  P Pattern
p_group P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_bracket P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_char P Pattern -> [Char] -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"an atom"

group_index :: P (Maybe GroupIndex)
group_index :: P (Maybe GroupIndex)
group_index = do
  (gi,ci) <- ParsecT
  [Char] (GroupIndex, GroupIndex) Identity (GroupIndex, GroupIndex)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
  let index = GroupIndex -> GroupIndex
forall a. Enum a => a -> a
succ GroupIndex
gi
  setState (index,ci)
  return (Just index)

p_group :: P Pattern
p_group :: P Pattern
p_group = ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(') ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> do
  index <- P (Maybe GroupIndex)
group_index
  liftM (PGroup index) $ between (char '(') (char ')') p_regex

-- p_post_atom takes the previous atom as a parameter
p_post_atom :: Pattern -> P Pattern
p_post_atom :: Pattern -> P Pattern
p_post_atom Pattern
atom = (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'?' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> Pattern
PQuest Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'+' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> Pattern
PPlus Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'*' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Pattern -> Pattern
PStar Bool
True Pattern
atom))
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Pattern -> P Pattern
p_bound Pattern
atom
               P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Pattern
atom

p_bound :: Pattern -> P Pattern
p_bound :: Pattern -> P Pattern
p_bound Pattern
atom = P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (P Pattern -> P Pattern) -> P Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern
-> P Pattern
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') (Pattern -> P Pattern
p_bound_spec Pattern
atom)

p_bound_spec :: Pattern -> P Pattern
p_bound_spec :: Pattern -> P Pattern
p_bound_spec Pattern
atom = do lowS <- ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
                       let lowI = [Char] -> GroupIndex
forall a. Read a => [Char] -> a
read [Char]
lowS
                       highMI <- option (Just lowI) $ try $ do
                                   _ <- char ','
  -- parsec note: if 'many digits' fails below then the 'try' ensures
  -- that the ',' will not match the closing '}' in p_bound, same goes
  -- for any non '}' garbage after the 'many digits'.
                                   highS <- many digit
                                   if null highS then return Nothing -- no upper bound
                                     else do let highI = [Char] -> GroupIndex
forall a. Read a => [Char] -> a
read [Char]
highS
                                             guard (lowI <= highI)
                                             return (Just (read highS))
                       return (PBound lowI highMI atom)

-- An anchor cannot be modified by a repetition specifier
p_anchor :: P Pattern
p_anchor :: P Pattern
p_anchor = (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (DoPa -> Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM DoPa -> Pattern
PCarat ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index)
       P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'$' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (DoPa -> Pattern)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> P Pattern
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM DoPa -> Pattern
PDollar ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index)
       P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (do _ <- [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"()"
                   index <- group_index
                   return $ PGroup index PEmpty)
       P Pattern -> [Char] -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Char] -> ParsecT s u m a
<?> [Char]
"empty () or anchor ^ or $"

char_index :: P DoPa
char_index :: ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index = do (gi,ci) <- ParsecT
  [Char] (GroupIndex, GroupIndex) Identity (GroupIndex, GroupIndex)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
                let ci' = GroupIndex -> GroupIndex
forall a. Enum a => a -> a
succ GroupIndex
ci
                setState (gi,ci')
                return (DoPa ci')

p_char :: P Pattern
p_char :: P Pattern
p_char = P Pattern
p_dot P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_left_brace P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_escaped P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> P Pattern
p_other_char where
  p_dot :: P Pattern
p_dot = Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'.' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DoPa -> Pattern
PDot
  p_left_brace :: P Pattern
p_left_brace = P Pattern -> P Pattern
forall tok st a. GenParser tok st a -> GenParser tok st a
try (P Pattern -> P Pattern) -> P Pattern -> P Pattern
forall a b. (a -> b) -> a -> b
$ (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity ()
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity ()
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit ParsecT [Char] (GroupIndex, GroupIndex) Identity ()
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PChar` Char
'{'))
  p_escaped :: P Pattern
p_escaped = Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> (Char -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PEscape` Char
c)
  p_other_char :: P Pattern
p_other_char = [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
specials ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> (Char -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Char
c -> ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
char_index ParsecT [Char] (GroupIndex, GroupIndex) Identity DoPa
-> (DoPa -> P Pattern) -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> (a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity b)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Pattern -> P Pattern
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pattern -> P Pattern) -> (DoPa -> Pattern) -> DoPa -> P Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (DoPa -> Char -> Pattern
`PChar` Char
c)
    where specials :: [Char]
specials  = [Char]
"^.[$()|*+?{\\"

-- parse [bar] and [^bar] sets of characters
p_bracket :: P Pattern
p_bracket :: P Pattern
p_bracket = (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'[') ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> P Pattern -> P Pattern
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Bool -> P Pattern
p_set Bool
True) P Pattern -> P Pattern -> P Pattern
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (Bool -> P Pattern
p_set Bool
False) )

p_set :: Bool -> P Pattern
p_set :: Bool -> P Pattern
p_set Bool
invert = do initial <- [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option [Char]
"" (Char -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']' ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall a b.
ParsecT [Char] (GroupIndex, GroupIndex) Identity a
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return [Char]
"]")
                  values <- if null initial then many1 p_set_elem else many p_set_elem
                  _ <- char ']'
                  ci <- char_index
                  let chars = [Char] -> Maybe (Set Char)
forall {a}. Ord a => [a] -> Maybe (Set a)
maybe'set ([Char] -> Maybe (Set Char)) -> [Char] -> Maybe (Set Char)
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Char]] -> [Char]) -> [[Char]] -> [Char]
forall a b. (a -> b) -> a -> b
$
                        [Char]
initial [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
:
                        [ Char
c | BEChar Char
c <- [BracketElement]
values ] [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
:
                        [ [Char
start..Char
end] | BERange Char
start Char
end <- [BracketElement]
values ]
                      colls = [PatternSetCollatingElement]
-> Maybe (Set PatternSetCollatingElement)
forall {a}. Ord a => [a] -> Maybe (Set a)
maybe'set [[Char] -> PatternSetCollatingElement
PatternSetCollatingElement [Char]
coll | BEColl [Char]
coll <- [BracketElement]
values ]
                      equivs = [PatternSetEquivalenceClass]
-> Maybe (Set PatternSetEquivalenceClass)
forall {a}. Ord a => [a] -> Maybe (Set a)
maybe'set [[Char] -> PatternSetEquivalenceClass
PatternSetEquivalenceClass [Char]
equiv | BEEquiv [Char]
equiv <- [BracketElement]
values]
                      class's = [PatternSetCharacterClass] -> Maybe (Set PatternSetCharacterClass)
forall {a}. Ord a => [a] -> Maybe (Set a)
maybe'set [[Char] -> PatternSetCharacterClass
PatternSetCharacterClass [Char]
a'class | BEClass [Char]
a'class <- [BracketElement]
values]
                      maybe'set [a]
x = if [a] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [a]
x then Maybe (Set a)
forall a. Maybe a
Nothing else Set a -> Maybe (Set a)
forall a. a -> Maybe a
Just ([a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [a]
x)
                      sets = Maybe (Set Char)
-> Maybe (Set PatternSetCharacterClass)
-> Maybe (Set PatternSetCollatingElement)
-> Maybe (Set PatternSetEquivalenceClass)
-> PatternSet
PatternSet Maybe (Set Char)
chars Maybe (Set PatternSetCharacterClass)
class's Maybe (Set PatternSetCollatingElement)
colls Maybe (Set PatternSetEquivalenceClass)
equivs
                  sets `seq` return $ if invert then PAnyNot ci sets else PAny ci sets

-- From here down the code is the parser and functions for pattern [ ] set things

p_set_elem :: P BracketElement
p_set_elem :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem = BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
checkBracketElement (BracketElement
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< [ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum
  [ ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_class
  , ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_equiv
  , ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_coll
  , ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_range
  , ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_char
  , [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a.
[Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"Failed to parse bracketed string"
  ]

p_set_elem_class :: P BracketElement
p_set_elem_class :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_class = ([Char] -> BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Char] -> BracketElement
BEClass (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"[:") ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
":]") (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char])
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
":]"))

p_set_elem_equiv :: P BracketElement
p_set_elem_equiv :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_equiv = ([Char] -> BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Char] -> BracketElement
BEEquiv (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"[=") ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"=]") (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char])
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
"=]"))

p_set_elem_coll :: P BracketElement
p_set_elem_coll :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_coll =  ([Char] -> BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM [Char] -> BracketElement
BEColl (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$
  ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
"[.") ([Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m [Char]
string [Char]
".]") (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 (ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char])
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
".]"))

p_set_elem_range :: P BracketElement
p_set_elem_range :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_range = ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall tok st a. GenParser tok st a -> GenParser tok st a
try (ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$ do
  start <- [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
"]"
  _  <- char '-'
  end <- noneOf "]"
  return $ BERange start end

p_set_elem_char :: P BracketElement
p_set_elem_char :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
p_set_elem_char = do
  c <- [Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
[Char] -> ParsecT s u m Char
noneOf [Char]
"]"
  return (BEChar c)

-- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
-- This failure should not be caught.
--
checkBracketElement :: BracketElement -> P BracketElement
checkBracketElement :: BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
checkBracketElement BracketElement
e =
  case BracketElement
e of
    BERange Char
start Char
end
      | Char
start Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
end -> [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a.
[Char] -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char]
 -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement)
-> [Char]
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char]
unwords
          [ [Char]
"End point"
          , Char -> [Char]
forall a. Show a => a -> [Char]
show Char
end
          , [Char]
"of dashed character range is less than starting point"
          , Char -> [Char]
forall a. Show a => a -> [Char]
show Char
start
          ]
      | Bool
otherwise -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEChar  Char
_ -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEClass [Char]
_ -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEColl  [Char]
_ -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok
    BEEquiv [Char]
_ -> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok
  where
    ok :: ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
ok = BracketElement
-> ParsecT [Char] (GroupIndex, GroupIndex) Identity BracketElement
forall a. a -> ParsecT [Char] (GroupIndex, GroupIndex) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return BracketElement
e