{-# LINE 1 "Text/Regex/PCRE/Light/Base.hsc" #-}
{-# LANGUAGE CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving #-}
--------------------------------------------------------------------
-- |
-- Module   : Text.Regex.PCRE.Light.Base
-- Copyright: Copyright (c) 2007-2008, Don Stewart
--
-- Documentation based on /man pcreapi/, written by Philip Hazel, 2007.
--
-- License   : BSD3
-- Maintainer:  Don Stewart <dons@galois.com>
-- Stability :  experimental
-- Portability: CPP, FFI
-- Tested with: GHC 6.8.2
--
-- Raw FFI bindings to PCRE functions and constants. 
--

module Text.Regex.PCRE.Light.Base (

        -- * A PCRE structure
          PCRE
        , Regex(..)

        -- * C exports
        , c_pcre_compile
        , c_pcre_exec
        , c_pcre_fullinfo

        ------------------------------------------------------------------------

        -- * PCRE Options, an abstract newtyped Num wrapper over a CInt
        , PCREOption
        , combineOptions

        , anchored , auto_callout {-, bsr_anycrlf-}
        {-, bsr_unicode-} , caseless , dollar_endonly
        , dotall , dupnames , extended
        , extra , firstline , multiline
        {-, newline_any-} {-, newline_anycrlf-} , newline_cr
        , newline_crlf , newline_lf , no_auto_capture
        , ungreedy , utf8 , no_utf8_check

        -- * PCRE exec-time options, an abstract, newtyped Num wrapper over CInt
        , PCREExecOption
        , combineExecOptions

        , exec_anchored {-, exec_newline_any , exec_newline_anycrlf-}
        , exec_newline_cr , exec_newline_crlf , exec_newline_lf
        , exec_notbol , exec_noteol , exec_notempty
        , exec_no_utf8_check , exec_partial

        ------------------------------------------------------------------------ 
        -- * PCRE Errors
        , PCREError

        , error_nomatch , error_null , error_badoption
        , error_badmagic {-, error_unknown_opcode-} , error_unknown_node
        , error_nomemory , error_nosubstring , error_matchlimit
        , error_callout , error_badutf8 , error_badutf8_offset
        , error_partial , error_badpartial , error_internal
        , error_badcount , error_dfa_uitem , error_dfa_ucond
        , error_dfa_umlimit , error_dfa_wssize , error_dfa_recurse
        , error_recursionlimit {-, error_nullwslimit-} {-, error_badnewline-}

        -- * PCRE Info
        , PCREInfo

        , info_options , info_size , info_capturecount
        , info_backrefmax , info_firstbyte , info_firstchar
        , info_firsttable , info_lastliteral , info_nameentrysize
        , info_namecount , info_nametable , info_studysize
        , info_default_tables {-, info_okpartial-} {-, info_jchanged-}
        {-, info_hascrorlf-}

        -- * PCRE Configuration
        , PCREConfig

        , config_utf8 , config_newline , config_link_size
        , config_posix_malloc_threshold , config_match_limit
        , config_stackrecurse , config_unicode_properties
        , config_match_limit_recursion {-, config_bsr-}

        -- * PCRE Extra
        , PCREExtraFlags

        , extra_study_data , extra_match_limit , extra_callout_data
        , extra_tables , extra_match_limit_recursion,

        ------------------------------------------------------------------------

        size_of_cint

    ) where

-- Foreigns
import Foreign
import Foreign.Ptr
import Foreign.C.Types
import Foreign.C.String

import qualified Data.ByteString.Char8 as S



-- | Get sizeof CInt from hsc2hs
size_of_cint :: Int
size_of_cint :: Int
size_of_cint = Int
4
{-# LINE 108 "Text/Regex/PCRE/Light/Base.hsc" #-}

------------------------------------------------------------------------
-- Types

-- | An abstract pointer to a compiled PCRE Regex structure
-- The structure allocated by the PCRE library will be deallocated
-- automatically by the Haskell storage manager.
--
data Regex = Regex {-# UNPACK #-} !(ForeignPtr PCRE)
                   {-# UNPACK #-} !S.ByteString
        deriving (Regex -> Regex -> Bool
(Regex -> Regex -> Bool) -> (Regex -> Regex -> Bool) -> Eq Regex
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Regex -> Regex -> Bool
== :: Regex -> Regex -> Bool
$c/= :: Regex -> Regex -> Bool
/= :: Regex -> Regex -> Bool
Eq, Eq Regex
Eq Regex =>
(Regex -> Regex -> Ordering)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Bool)
-> (Regex -> Regex -> Regex)
-> (Regex -> Regex -> Regex)
-> Ord Regex
Regex -> Regex -> Bool
Regex -> Regex -> Ordering
Regex -> Regex -> Regex
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Regex -> Regex -> Ordering
compare :: Regex -> Regex -> Ordering
$c< :: Regex -> Regex -> Bool
< :: Regex -> Regex -> Bool
$c<= :: Regex -> Regex -> Bool
<= :: Regex -> Regex -> Bool
$c> :: Regex -> Regex -> Bool
> :: Regex -> Regex -> Bool
$c>= :: Regex -> Regex -> Bool
>= :: Regex -> Regex -> Bool
$cmax :: Regex -> Regex -> Regex
max :: Regex -> Regex -> Regex
$cmin :: Regex -> Regex -> Regex
min :: Regex -> Regex -> Regex
Ord, Int -> Regex -> ShowS
[Regex] -> ShowS
Regex -> String
(Int -> Regex -> ShowS)
-> (Regex -> String) -> ([Regex] -> ShowS) -> Show Regex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Regex -> ShowS
showsPrec :: Int -> Regex -> ShowS
$cshow :: Regex -> String
show :: Regex -> String
$cshowList :: [Regex] -> ShowS
showList :: [Regex] -> ShowS
Show)

type PCRE = ()

------------------------------------------------------------------------

-- | A type for PCRE compile-time options. These are newtyped CInts,
-- which can be bitwise-or'd together, using '(Data.Bits..|.)'
--
newtype PCREOption = PCREOption { PCREOption -> PCREExtraFlags
unPCREOption :: PCREOption_ }

{-# LINE 129 "Text/Regex/PCRE/Light/Base.hsc" #-}
    deriving (Eq,Ord,Show,Read)

{-# LINE 131 "Text/Regex/PCRE/Light/Base.hsc" #-}

-- | Combine a list of options into a single option, using bitwise (.|.)
combineOptions :: [PCREOption] -> PCREOption
combineOptions :: [PCREOption] -> PCREOption
combineOptions = PCREExtraFlags -> PCREOption
PCREOption (PCREExtraFlags -> PCREOption)
-> ([PCREOption] -> PCREExtraFlags) -> [PCREOption] -> PCREOption
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (PCREOption -> PCREExtraFlags -> PCREExtraFlags)
-> PCREExtraFlags -> [PCREOption] -> PCREExtraFlags
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (PCREExtraFlags -> PCREExtraFlags -> PCREExtraFlags
forall a. Bits a => a -> a -> a
(.|.) (PCREExtraFlags -> PCREExtraFlags -> PCREExtraFlags)
-> (PCREOption -> PCREExtraFlags)
-> PCREOption
-> PCREExtraFlags
-> PCREExtraFlags
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PCREOption -> PCREExtraFlags
unPCREOption) PCREExtraFlags
0

-- Now follows the user-visible options to _exec and _compile.
-- To avoid type errors, we newtype the underlying CInts, and 
-- statically differentiate PCREOptions from non-PCREOptions
--
-- The safety can still be defeated using numeric literals though,
-- and other Num operations. We could do more to protect against this.
-- (a smart constructor for .|.)

-- | 'anchored'
--
-- If this bit is set, the pattern is forced to be /anchored/, that is,
-- it is constrained to match only at the first matching point in the
-- string that is being searched (the /subject string/). This effect can
-- also be achieved by appropriate constructs in the pattern itself, which
-- is the only way to do it in Perl.  
--
anchored           :: PCREOption
anchored :: PCREOption
anchored           = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
anchored_cint

-- | 'auto_callout'
--
-- If this bit is set, "compile" automatically inserts callout
-- items, all with number 255, before each pattern item. For discussion
-- of the callout facility, see the man pcrecallout documentation
--
auto_callout       :: PCREOption
auto_callout :: PCREOption
auto_callout       = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
auto_callout_cint

-- | 'bsr_anycrlf' and 'bsr_unicode'
--
-- These options (which are mutually exclusive) control what the \\R escape
-- sequence matches. The choice is either to match only CR, LF, or CRLF, or to
-- match any Unicode new- line sequence. The default is specified when PCRE is
-- built. It can be overridden from within the pattern, or by setting an option
-- when a compiled pattern is matched.
--
-- bsr_anycrlf        :: PCREOption
-- bsr_anycrlf        = PCREOption bsr_anycrlf_cint

-- | 'bsr_unicode'. See 'bse_anycrlf'
--
-- bsr_unicode        :: PCREOption
-- bsr_unicode        = PCREOption bsr_unicode_cint

-- | 'caseless'
--
-- If this bit is set, letters in the pattern match both upper and lower case
-- letters. It is equivalent to Perl's \/i option, and it can be changed within a
-- pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
-- concept of case for characters whose values are less than 128, so caseless
-- matching is always possible. For characters with higher values, the concept of
-- case is supported if PCRE is compiled with Unicode property sup- port, but not
-- otherwise. If you want to use caseless matching for characters 128 and above,
-- you must ensure that PCRE is compiled with Unicode property support as well as
-- with UTF-8 support.
-- 
caseless           :: PCREOption
caseless :: PCREOption
caseless           = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
caseless_cint

-- | 'dollar_endonly'
--
-- If this bit is set, a dollar metacharacter in the pattern matches only at
-- the end of the subject string. Without this option, a dollar also matches
-- immediately before a newline at the end of the string (but not before any other
-- newlines). The 'dollar_endonly' option is ignored if 'multiline'
-- is set.  There is no equivalent to this option in Perl, and no way to set it
-- within a pattern.
--
dollar_endonly     :: PCREOption
dollar_endonly :: PCREOption
dollar_endonly     = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
dollar_endonly_cint

-- | 'dotall'
--
-- If this bit is set, a dot metacharater in the pattern matches all
-- characters, including those that indicate newline. Without it, a dot does
-- not match when the current position is at a newline. This option is
-- equivalent to Perl's \/s option, and it can be changed within a pattern by a
-- (?s) option setting. A negative class such as [^a] always matches newline
-- characters, independent of the setting of this option.
--
dotall             :: PCREOption
dotall :: PCREOption
dotall             = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
dotall_cint

-- | 'dupnames'
--
-- If this bit is set, names used to identify capturing subpatterns need not be
-- unique. This can be helpful for certain types of pattern when it is known
-- that only one instance of the named subpattern can ever be matched. There are
-- more details of named subpatterns in the /man pcreapi/ documentation.
--
dupnames           :: PCREOption
dupnames :: PCREOption
dupnames           = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
dupnames_cint

-- | 'extended'
--
-- If this bit is set, whitespace data characters in the pattern are totally
-- ignored except when escaped or inside a character class. Whitespace does not
-- include the VT character (code 11). In addition, characters between an
-- unescaped \# outside a character class and the next newline, inclusive, are
-- also ignored. This is equivalent to Perl's \/x option, and it can be changed
--within a pattern by a (?x) option setting.
--
-- This option makes it possible to include comments inside complicated
-- patterns. Note, however, that this applies only to data characters. Whitespace
-- characters may never appear within special character sequences in a pattern,
-- for example within the sequence (?( which introduces a conditional subpattern.
--
extended           :: PCREOption
extended :: PCREOption
extended           = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
extended_cint

-- | 'extra'
--
-- This option was invented in order to turn on additional functionality of
-- PCRE that is incompatible with Perl, but it is currently of very little use.
-- When set, any backslash in a pattern that is followed by a letter that has no
-- special meaning causes an error, thus reserving these combinations for future
-- expansion. By default, as in Perl, a backslash followed by a letter with no
-- special meaning is treated as a literal. (Perl can, however, be persuaded to
-- give a warning for this.) There are at present no other features controlled by
-- this option. It can also be set by a (?X) option setting within a pattern. 
--
extra              :: PCREOption
extra :: PCREOption
extra              = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
extra_cint

-- | 'firstline'
--
-- If this option is set, an unanchored pattern is required to match before or
-- at the first newline in the subject string, though the matched text may
--continue over the newline.
--
firstline          :: PCREOption
firstline :: PCREOption
firstline          = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
firstline_cint

-- | 'multiline'
--
--  By default, PCRE treats the subject string as consisting of a single line
-- of characters (even if it actually contains newlines). The /start of line/
-- metacharacter (^) matches only at the start of the string, while the /end of line/
--  metacharacter ($) matches only at the end of the string, or before a
-- terminating newline (unless 'dollar_endonly' is set). This is the same
-- as Perl.
--
-- When 'multiline' it is set, the /start of line/ and /end of line/
-- constructs match immediately following or immediately before internal newlines
-- in the subject string, respectively, as well as at the very start and end. This
-- is equivalent to Perl's \/m option, and it can be changed within a pattern by a
-- (?m) option setting. If there are no newlines in a subject string, or no occur-
-- rences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
-- 
multiline          :: PCREOption
multiline :: PCREOption
multiline          = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
multiline_cint

-- | newline_cr', 'newline_lf', 'newline_crlf',
-- 'newline_anycrlf', 'newline_any'
--
-- These options override the default newline definition that
-- was chosen when PCRE was built. Setting the first or the
-- second specifies that a newline is indicated by a single
-- character (CR or LF, respectively). Setting 'newline_crlf' specifies
-- that a newline is indicated by the two-character CRLF sequence.
-- Setting 'newline_anycrlf'
-- specifies that any of the three preceding sequences should
-- be recognized. Setting 'newline_any' specifies that any
-- Unicode newline sequence should be recognized. The Unicode
-- newline sequences are the three just mentioned, plus the
-- single characters VT (vertical tab, U+000B), FF (formfeed,
-- U+000C), NEL (next line, U+0085), LS (line separator,
-- U+2028), and PS (paragraph separator, U+2029). The last
-- two are recognized only in UTF-8 mode.
-- 
-- The newline setting in the options word uses three bits
-- that are treated as a number, giving eight possibilities.
-- Currently only six are used (default plus the five values
-- above). This means that if you set more than one newline
-- option, the combination may or may not be sensible. For
-- example, 'newline_cr' with 'newline_lf' is equivalent to
-- 'newline_crlf', but other combinations may yield unused numbers and
-- cause an error.
-- 
-- The only time that a line break is specially recognized
-- when compiling a pattern is if 'extended' is set, and
-- an unescaped \# outside a character class is encountered.
-- This indicates a comment that lasts until after the next
-- line break sequence. In other circumstances, line break
-- sequences are treated as literal data, except that in
-- 'extended' mode, both CR and LF are treated as whitespace characters
-- and are therefore ignored.  -- 
--
-- The newline option that is set at compile time becomes the
-- default that is used for 'exec' but it can be overridden.
-- 
-- newline_any        :: PCREOption
-- newline_any        = PCREOption newline_any_cint

-- | 'newline_anycrlf', see 'newline_any'
-- newline_anycrlf    :: PCREOption
-- newline_anycrlf    = PCREOption newline_anycrlf_cint

-- | 'newline_cr', see 'newline_any'
newline_cr         :: PCREOption
newline_cr :: PCREOption
newline_cr         = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
newline_cr_cint

-- | 'newline_crlf', see 'newline_any'
newline_crlf       :: PCREOption
newline_crlf :: PCREOption
newline_crlf       = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
newline_crlf_cint

-- | 'newline_lf', see 'newline_any'
newline_lf         :: PCREOption
newline_lf :: PCREOption
newline_lf         = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
newline_lf_cint

-- | 'no_auto_capture'
--
-- If this option is set, it disables the use of numbered
-- capturing parentheses in the pattern. Any opening paren-
-- thesis that is not followed by ? behaves as if it were
-- followed by ?: but named parentheses can still be used for
-- capturing (and they acquire numbers in the usual way).
-- There is no equivalent of this option in Perl.
--
no_auto_capture    :: PCREOption
no_auto_capture :: PCREOption
no_auto_capture    = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
no_auto_capture_cint

-- | 'ungreedy'
--
-- This option inverts the /greediness/ of the quantifiers so
-- that they are not greedy by default, but become greedy if
-- followed by /?/. It is not compatible with Perl. It can
-- also be set by a (?U) option setting within the pattern.
--
ungreedy           :: PCREOption
ungreedy :: PCREOption
ungreedy           = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
ungreedy_cint

-- | 'utf8'
--
-- This option causes PCRE to regard both the pattern and the
-- subject as strings of UTF-8 characters instead of single-byte character
-- strings. However, it is available only when 
-- PCRE is built to include UTF-8 support. If not, the use of
-- this option provokes an error. Details of how this option
-- changes the behaviour of PCRE are given in the section on
-- UTF-8 support in the main pcre page.
--
utf8               :: PCREOption
utf8 :: PCREOption
utf8               = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
utf8_cint

-- | 'no_utf8_check'
--
-- When PCRE_UTF8 is set, the validity of the pattern as a
-- UTF-8 string is automatically checked. There is a discussion 
-- about the validity of UTF-8 strings in the main pcre
-- page. If an invalid UTF-8 sequence of bytes is found,
-- compile() returns an error. If you already know that
-- your pattern is valid, and you want to skip this check for
-- performance reasons, you can set the 'no_utf8_check'
-- option. When it is set, the effect of passing an invalid
-- UTF-8 string as a pattern is undefined. It may cause your
-- program to crash. Note that this option can also be passed
-- to 'exec', to suppress the UTF-8 validity checking of subject strings.
--
no_utf8_check      :: PCREOption
no_utf8_check :: PCREOption
no_utf8_check      = PCREExtraFlags -> PCREOption
PCREOption PCREExtraFlags
no_utf8_check_cint

-- Internal name for hsc2hs to bind to.
type PCREOption_ = CInt

-- PCRE compile options, as CInts
anchored_cint         :: PCREOption_
anchored_cint :: PCREExtraFlags
anchored_cint         =  PCREExtraFlags
16
auto_callout_cint     :: PCREOption_
auto_callout_cint :: PCREExtraFlags
auto_callout_cint     =  PCREExtraFlags
16384
caseless_cint         :: PCREOption_
caseless_cint :: PCREExtraFlags
caseless_cint         =  PCREExtraFlags
1
dollar_endonly_cint   :: PCREOption_
dollar_endonly_cint :: PCREExtraFlags
dollar_endonly_cint   =  PCREExtraFlags
32
dotall_cint           :: PCREOption_
dotall_cint :: PCREExtraFlags
dotall_cint           =  PCREExtraFlags
4
dupnames_cint         :: PCREOption_
dupnames_cint :: PCREExtraFlags
dupnames_cint         =  PCREExtraFlags
524288
extended_cint         :: PCREOption_
extended_cint :: PCREExtraFlags
extended_cint         =  PCREExtraFlags
8
extra_cint            :: PCREOption_
extra_cint :: PCREExtraFlags
extra_cint            =  PCREExtraFlags
64
firstline_cint        :: PCREOption_
firstline_cint :: PCREExtraFlags
firstline_cint        =  PCREExtraFlags
262144
multiline_cint        :: PCREOption_
multiline_cint :: PCREExtraFlags
multiline_cint        =  PCREExtraFlags
2
newline_cr_cint       :: PCREOption_
newline_cr_cint :: PCREExtraFlags
newline_cr_cint       =  PCREExtraFlags
1048576
newline_crlf_cint     :: PCREOption_
newline_crlf_cint :: PCREExtraFlags
newline_crlf_cint     =  PCREExtraFlags
3145728
newline_lf_cint       :: PCREOption_
newline_lf_cint :: PCREExtraFlags
newline_lf_cint       =  PCREExtraFlags
2097152
no_auto_capture_cint  :: PCREOption_
no_auto_capture_cint :: PCREExtraFlags
no_auto_capture_cint  =  PCREExtraFlags
4096
ungreedy_cint         :: PCREOption_
ungreedy_cint :: PCREExtraFlags
ungreedy_cint         =  PCREExtraFlags
512
utf8_cint             :: PCREOption_
utf8_cint :: PCREExtraFlags
utf8_cint             =  PCREExtraFlags
2048
no_utf8_check_cint    :: PCREOption_
no_utf8_check_cint :: PCREExtraFlags
no_utf8_check_cint    =  PCREExtraFlags
8192

{-# LINE 421 "Text/Regex/PCRE/Light/Base.hsc" #-}

--  , bsr_anycrlf_cint     = PCRE_BSR_ANYCRLF
--  , bsr_unicode_cint     = PCRE_BSR_UNICODE
--  , newline_any_cint     = PCRE_NEWLINE_ANY
--  , newline_anycrlf_cint = PCRE_NEWLINE_ANYCRLF

------------------------------------------------------------------------

-- | PCRE exec options, to be passed to exec
newtype PCREExecOption = PCREExecOption { unPCREExecOption :: PCREExecOption_ }

{-# LINE 432 "Text/Regex/PCRE/Light/Base.hsc" #-}
    deriving (Eq,Ord,Show,Read)

{-# LINE 434 "Text/Regex/PCRE/Light/Base.hsc" #-}

-- | Combine a list of exec options into a single option, using bitwise (.|.)
combineExecOptions :: [PCREExecOption] -> PCREExecOption
combineExecOptions = PCREExecOption . foldr ((.|.) . unPCREExecOption) 0

-- | 'anchored'.
--
-- The 'anchored' option limits 'exec' to matching at
-- the first matching position. If a pattern was compiled
-- with 'anchored', or turned out to be anchored by virtue
-- of its contents, it cannot be made unachored at matching
-- time.
exec_anchored              :: PCREExecOption
exec_anchored :: PCREExecOption
exec_anchored              = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_anchored_cint

-- | 'newline_cr', 'newline_lf',
-- 'newline_crlf', 'newline_anycrlf', 'newline_any'
--
-- These options override the newline definition that was
-- chosen or defaulted when the pattern was compiled. For
-- details, see the description of 'compile' above. Dur-
-- ing matching, the newline choice affects the behaviour of
-- the dot, circumflex, and dollar metacharacters. It may
-- also alter the way the match position is advanced after a
-- match failure for an unanchored pattern.
--
-- When 'newline_crlf', 'newline_anycrlf', or 'newline_any'
-- is set, and a match attempt for an unanchored
-- pattern fails when the current position is at a CRLF
-- sequence, and the pattern contains no explicit matches for
-- CR or LF characters, the match position is advanced by two
-- characters instead of one, in other words, to after the
-- CRLF.
--
-- The above rule is a compromise that makes the most common
-- cases work as expected. For example, if the pattern is .+A
-- (and the 'dotall' option is not set), it does not match
-- the string /\\r\\nA/ because, after failing at the start, it
-- skips both the CR and the LF before retrying. However, the
-- pattern /[\\r\\n]A/ does match that string, because it contains
-- an explicit CR or LF reference, and so advances only
-- by one character after the first failure.
--
-- An explicit match for CR of LF is either a literal appear-
-- ance of one of those characters, or one of the \\r or \\n
-- escape sequences. Implicit matches such as [^X] do not
-- count, nor does \\s (which includes CR and LF in the char-
-- acters that it matches).
--
-- Notwithstanding the above, anomalous effects may still
-- occur when CRLF is a valid newline sequence and explicit
-- \\r or \\n escapes appear in the pattern.
--
-- exec_newline_any           :: PCREExecOption
-- exec_newline_any           = PCREExecOption exec_newline_any_cint

-- | 'exec_newline_anycrlf', see 'exec_newline_any'
-- exec_newline_anycrlf       :: PCREExecOption
-- exec_newline_anycrlf       = PCREExecOption exec_newline_anycrlf_cint

-- | 'exec_newline_cr', see 'exec_newline_any'
exec_newline_cr            :: PCREExecOption
exec_newline_cr :: PCREExecOption
exec_newline_cr            = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_newline_cr_cint

-- | 'exec_newline_crlf', see 'exec_newline_any'
exec_newline_crlf          :: PCREExecOption
exec_newline_crlf :: PCREExecOption
exec_newline_crlf          = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_newline_crlf_cint

-- | 'exec_newline_lf', see 'exec_newline_any'
exec_newline_lf            :: PCREExecOption
exec_newline_lf :: PCREExecOption
exec_newline_lf            = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_newline_lf_cint

-- | 'PCRE_NOTBOL'
--
-- This option specifies that first character of the subject
-- string is not the beginning of a line, so the circumflex
-- metacharacter should not match before it. Setting this
-- without 'multiline' (at compile time) causes circumflex
-- never to match. This option affects only the behaviour of
-- the circumflex metacharacter. It does not affect \\A.
--
exec_notbol                :: PCREExecOption
exec_notbol :: PCREExecOption
exec_notbol                = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_notbol_cint

-- | 'noteol'
--
-- This option specifies that the end of the subject string
-- is not the end of a line, so the dollar metacharacter
-- should not match it nor (except in multiline mode) a newline
-- immediately before it. Setting this without 'multiline' 
-- (at compile time) causes dollar never to match.
-- This option affects only the behaviour of the dollar
-- metacharacter. It does not affect \\Z or \\z.
--
exec_noteol                :: PCREExecOption
exec_noteol :: PCREExecOption
exec_noteol                = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_noteol_cint

-- | PCRE_NOTEMPTY
--
-- An empty string is not considered to be a valid match if
-- this option is set. If there are alternatives in the pattern,
-- they are tried. If all the alternatives match the
-- empty string, the entire match fails. For example, if the
-- pattern
--
-- > a?b?
--
-- is applied to a string not beginning with /a/ or /b/, it
-- matches the empty string at the start of the subject. With
-- 'notempty' set, this match is not valid, so 'PCRE
-- searches further into the string for occurrences of /a/ or
-- /b/.
--
-- Perl has no direct equivalent of 'notempty', but it
-- does make a special case of a pattern match of the empty
-- string within its split() function, and when using the \/g
-- modifier. It is possible to emulate Perl's behaviour after
-- matching a null string by first trying the match again at
-- the same offset with PCRE_NOTEMPTY and PCRE_ANCHORED, and
-- then if that fails by advancing the starting offset (see
-- below) and trying an ordinary match again. There is some
-- code that demonstrates how to do this in the pcredemo.c
-- sample program.
--
exec_notempty              :: PCREExecOption
exec_notempty :: PCREExecOption
exec_notempty              = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_notempty_cint

-- | 'no_utf8_check'
--
-- When 'utf8' is set at compile time, the validity of the
-- subject as a UTF-8 string is automatically checked when
-- exec() is subsequently called. The value of
-- startoffset is also checked to ensure that it points to
-- the start of a UTF-8 character. There is a discussion
-- about the validity of UTF-8 strings in the section on
-- UTF-8 support in the main pcre page. If an invalid UTF-8
-- sequence of bytes is found, exec() returns the error
-- 'error_badutf8'. If startoffset contains an invalid
-- value, 'error_badutf8_offset' is returned.
--
-- If you already know that your subject is valid, and you
-- want to skip these checks for performance reasons, you can
-- set the 'no_utf8_check' option when calling
-- 'exec'. You might want to do this for the second and
-- subsequent calls to exec() if you are making repeated
-- calls to find all the matches in a single subject string.
-- However, you should be sure that the value of startoffset
-- points to the start of a UTF-8 character. When
-- 'no_utf8_check' is set, the effect of passing an
-- invalid UTF-8 string as a subject, or a value of startoff-
-- set that does not point to the start of a UTF-8 character,
-- is undefined. Your program may crash.
--
exec_no_utf8_check         :: PCREExecOption
exec_no_utf8_check :: PCREExecOption
exec_no_utf8_check         = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_no_utf8_check_cint

-- | 'partial'
--
-- This option turns on the partial matching feature. If the
-- subject string fails to match the pattern, but at some
-- point during the matching process the end of the subject
-- was reached (that is, the subject partially matches the
-- pattern and the failure to match occurred only because
-- there were not enough subject characters), 'exec'
-- returns 'error_partial' instead of 'error_nomatch'.
-- When 'partial' is used, there are restrictions on what
-- may appear in the pattern. These are discussed in the
-- pcrepartial documentation.
--
exec_partial               :: PCREExecOption
exec_partial :: PCREExecOption
exec_partial               = PCREExtraFlags -> PCREExecOption
PCREExecOption PCREExtraFlags
exec_partial_cint

-- Internal name for hsc2hs to bind to.
type PCREExecOption_ = CInt

-- PCRE exec options
exec_anchored_cint         :: PCREExecOption_
exec_anchored_cint :: PCREExtraFlags
exec_anchored_cint         =  PCREExtraFlags
16
exec_newline_cr_cint       :: PCREExecOption_
exec_newline_cr_cint :: PCREExtraFlags
exec_newline_cr_cint       =  PCREExtraFlags
1048576
exec_newline_crlf_cint     :: PCREExecOption_
exec_newline_crlf_cint :: PCREExtraFlags
exec_newline_crlf_cint     =  PCREExtraFlags
3145728
exec_newline_lf_cint       :: PCREExecOption_
exec_newline_lf_cint :: PCREExtraFlags
exec_newline_lf_cint       =  PCREExtraFlags
2097152
exec_notbol_cint           :: PCREExecOption_
exec_notbol_cint :: PCREExtraFlags
exec_notbol_cint           =  PCREExtraFlags
128
exec_noteol_cint           :: PCREExecOption_
exec_noteol_cint :: PCREExtraFlags
exec_noteol_cint           =  PCREExtraFlags
256
exec_notempty_cint         :: PCREExecOption_
exec_notempty_cint :: PCREExtraFlags
exec_notempty_cint         =  PCREExtraFlags
1024
exec_no_utf8_check_cint    :: PCREExecOption_
exec_no_utf8_check_cint :: PCREExtraFlags
exec_no_utf8_check_cint    =  PCREExtraFlags
8192
exec_partial_cint          :: PCREExecOption_
exec_partial_cint :: PCREExtraFlags
exec_partial_cint          =  PCREExtraFlags
32768

{-# LINE 621 "Text/Regex/PCRE/Light/Base.hsc" #-}

--  , exec_newline_any_cint     = PCRE_NEWLINE_ANY
--  , exec_newline_anycrlf_cint = PCRE_NEWLINE_ANYCRLF
--  , dfa_shortest   = PCRE_DFA_SHORTEST
--  , dfa_restart    = PCRE_DFA_RESTART

------------------------------------------------------------------------

-- | A type for PCRE Errors: exec-time error codes.
type PCREError = CInt

error_nomatch         :: PCREError
error_nomatch :: PCREExtraFlags
error_nomatch         =  (-PCREExtraFlags
1)
error_null            :: PCREError
error_null :: PCREExtraFlags
error_null            =  (-PCREExtraFlags
2)
error_badoption       :: PCREError
error_badoption :: PCREExtraFlags
error_badoption       =  (-PCREExtraFlags
3)
error_badmagic        :: PCREError
error_badmagic :: PCREExtraFlags
error_badmagic        =  (-PCREExtraFlags
4)
error_unknown_node    :: PCREError
error_unknown_node :: PCREExtraFlags
error_unknown_node    =  (-PCREExtraFlags
5)
error_nomemory        :: PCREError
error_nomemory :: PCREExtraFlags
error_nomemory        =  (-PCREExtraFlags
6)
error_nosubstring     :: PCREError
error_nosubstring :: PCREExtraFlags
error_nosubstring     =  (-PCREExtraFlags
7)
error_matchlimit      :: PCREError
error_matchlimit :: PCREExtraFlags
error_matchlimit      =  (-PCREExtraFlags
8)
error_callout         :: PCREError
error_callout :: PCREExtraFlags
error_callout         =  (-PCREExtraFlags
9)
error_badutf8         :: PCREError
error_badutf8 :: PCREExtraFlags
error_badutf8         =  (-PCREExtraFlags
10)
error_badutf8_offset  :: PCREError
error_badutf8_offset :: PCREExtraFlags
error_badutf8_offset  =  (-PCREExtraFlags
11)
error_partial         :: PCREError
error_partial :: PCREExtraFlags
error_partial         =  (-PCREExtraFlags
12)
error_badpartial      :: PCREError
error_badpartial :: PCREExtraFlags
error_badpartial      =  (-PCREExtraFlags
13)
error_internal        :: PCREError
error_internal :: PCREExtraFlags
error_internal        =  (-PCREExtraFlags
14)
error_badcount        :: PCREError
error_badcount :: PCREExtraFlags
error_badcount        =  (-PCREExtraFlags
15)
error_dfa_uitem       :: PCREError
error_dfa_uitem :: PCREExtraFlags
error_dfa_uitem       =  (-PCREExtraFlags
16)
error_dfa_ucond       :: PCREError
error_dfa_ucond :: PCREExtraFlags
error_dfa_ucond       =  (-PCREExtraFlags
17)
error_dfa_umlimit     :: PCREError
error_dfa_umlimit :: PCREExtraFlags
error_dfa_umlimit     =  PCREExtraFlags
(-PCREExtraFlags
18)
error_dfa_wssize      :: PCREError
error_dfa_wssize :: PCREExtraFlags
error_dfa_wssize      =  PCREExtraFlags
(-PCREExtraFlags
19)
error_dfa_recurse     :: PCREError
error_dfa_recurse :: PCREExtraFlags
info_capturecount :: PCREExtraFlags
error_dfa_recurse     =  (-PCREExtraFlags
20)
error_recursionlimit  :: PCREError
error_recursionlimit :: PCREExtraFlags
error_recursionlimit  =  PCREExtraFlags
(-PCREExtraFlags
21)

{-# LINE 655 "Text/Regex/PCRE/Light/Base.hsc" #-}

--  , error_unknown_opcode = PCRE_ERROR_UNKNOWN_OPCODE
--  , error_nullwslimit    = PCRE_ERROR_NULLWSLIMIT
--  , error_badnewline     = PCRE_ERROR_BADNEWLINE

------------------------------------------------------------------------
-- Request types for fullinfo() */

-- | PCRE Info requests -- provides information about the compiled pattern.
type PCREInfo = CInt

info_options          :: PCREInfo
info_options          =  0
info_size             :: PCREInfo
info_size             =  1
info_capturecount     :: PCREInfo
info_capturecount     =  2
info_backrefmax       :: PCREInfo
info_backrefmax       =  3
info_firstbyte        :: PCREInfo
info_firstbyte        =  4
info_firstchar        :: PCREInfo
info_firstchar :: PCREExtraFlags
info_firstchar        =  PCREExtraFlags
4
info_firsttable       :: PCREInfo
info_firsttable :: PCREExtraFlags
info_firsttable       =  PCREExtraFlags
5
info_lastliteral      :: PCREInfo
info_lastliteral :: PCREExtraFlags
info_lastliteral      =  PCREExtraFlags
6
info_nameentrysize    :: PCREInfo
info_nameentrysize :: PCREExtraFlags
info_nameentrysize    =  PCREExtraFlags
7
info_namecount        :: PCREInfo
info_namecount :: PCREExtraFlags
info_namecount        =  PCREExtraFlags
8
info_nametable        :: PCREInfo
info_nametable :: PCREExtraFlags
info_nametable        =  PCREExtraFlags
9
info_studysize        :: PCREInfo
info_studysize :: PCREExtraFlags
info_studysize        =  PCREExtraFlags
10
info_default_tables   :: PCREInfo
info_default_tables :: PCREExtraFlags
info_default_tables   =  PCREExtraFlags
11

{-# LINE 681 "Text/Regex/PCRE/Light/Base.hsc" #-}

--  , info_okpartial       = PCRE_INFO_OKPARTIAL
--  , info_jchanged        = PCRE_INFO_JCHANGED
--  , info_hascrorlf       = PCRE_INFO_HASCRORLF

------------------------------------------------------------------------

-- | Request types for config()
type PCREConfig = CInt

config_utf8                    :: PCREConfig
config_utf8                    =  0
config_newline                 :: PCREConfig
config_newline :: PCREExtraFlags
config_newline                 =  PCREExtraFlags
1
config_link_size               :: PCREConfig
config_link_size :: PCREExtraFlags
config_link_size               =  PCREExtraFlags
2
config_posix_malloc_threshold  :: PCREConfig
config_posix_malloc_threshold :: PCREExtraFlags
config_posix_malloc_threshold  =  PCREExtraFlags
3
config_match_limit             :: PCREConfig
config_match_limit :: PCREExtraFlags
config_match_limit             =  PCREExtraFlags
4
config_stackrecurse            :: PCREConfig
config_stackrecurse :: PCREExtraFlags
config_stackrecurse            =  PCREExtraFlags
5
config_unicode_properties      :: PCREConfig
config_unicode_properties :: PCREExtraFlags
config_unicode_properties      =  PCREExtraFlags
6
config_match_limit_recursion   :: PCREConfig
config_match_limit_recursion :: PCREExtraFlags
config_match_limit_recursion   =  PCREExtraFlags
7

{-# LINE 701 "Text/Regex/PCRE/Light/Base.hsc" #-}

-- Not portable
--  , config_bsr                    = PCRE_CONFIG_BSR


------------------------------------------------------------------------

-- | PCREExtra.
-- A extra structure contains the following fields:
--
-- * flags        Bits indicating which fields are set
-- * study_data   Opaque data from study()
-- * match_limit  Limit on internal resource use
-- * match_limit_recursion  Limit on internal recursion depth
-- * callout_data Opaque data passed back to callouts
-- * tables       Points to character tables or is NULL
--
type PCREExtra = ()

-- | PCREExtraFlags. bit flags for extra structure.
type PCREExtraFlags = CInt

-- Bit flags for the extra structure. Do not re-arrange or redefine
-- these bits, just add new ones on the end, in order to remain compatible. */
extra_study_data             :: PCREExtraFlags
extra_study_data :: PCREExtraFlags
extra_study_data             =  PCREExtraFlags
1
extra_match_limit            :: PCREExtraFlags
extra_match_limit :: PCREExtraFlags
extra_match_limit            =  PCREExtraFlags
2
extra_callout_data           :: PCREExtraFlags
extra_callout_data :: PCREExtraFlags
extra_callout_data           =  PCREExtraFlags
4
extra_tables                 :: PCREExtraFlags
extra_tables :: PCREExtraFlags
extra_tables                 =  PCREExtraFlags
8
extra_match_limit_recursion  :: PCREExtraFlags
extra_match_limit_recursion :: PCREExtraFlags
extra_match_limit_recursion  =  PCREExtraFlags
16

{-# LINE 732 "Text/Regex/PCRE/Light/Base.hsc" #-}

-- PCRE_EXP_DECL pcre *compile(const char *, int, const char **, int *, const unsigned char *);
-- PCRE_EXP_DECL int  config(int, void *);
-- PCRE_EXP_DECL int  exec(const pcre *, const extra *, PCRE_SPTR, int, int, int, int *, int);

------------------------------------------------------------------------
-- C api

{-
   pcre *pcre_compile(const char *pattern, int options,
               const char **errptr, int *erroffset,
               const unsigned char *tableptr);
-}

-- | Compile a pattern to an internal form. The pattern is a C string
-- terminated by a binary zero. A pointer to a single block of memory that is
-- obtained via pcre_malloc is returned. It is up to the caller to free
-- the memory (via pcre_free) when it is no longer required
--
-- The options argument contains various bit settings that affect the
-- compilation. It should be zero if no options are required.
--
-- If errptr is NULL, pcre_compile() returns NULL immediately.
-- Otherwise, if compilation of a pattern fails, pcre_compile() returns NULL,
-- and sets the variable pointed to by errptr to point to a textual error
-- message.
--
-- The offset from the start of the pattern to the character where the error
-- was discovered is placed in the variable pointed to by erroffset, which must
-- not be NULL.
--
foreign import ccall unsafe "pcre.h pcre_compile"
    c_pcre_compile  :: CString
                    -> PCREOption
                    -> Ptr CString
                    -> Ptr CInt
                    -> Ptr Word8
                    -> IO (Ptr PCRE)

-- Additional fields to c_pcre_compile:
--
--    errptr        Where to put an error message
--    erroffset     Offset in pattern where error was found
--    tableptr      Pointer to character tables, or NULL to to use built in

{-
       int pcre_exec(const pcre *code, const pcre_extra *extra,
            const char *subject, int length, int startoffset,
            int options, int *ovector, int ovecsize);
-}

-- | This function matches a compiled regular expression
-- against a given subject string, using a matching algorithm
-- that is similar to Perl's. It returns offsets to captured
-- substrings.
--
-- Its arguments are, in order:
--
-- * 'code'        Points to the compiled pattern (result of pcre_compile)
--
-- * 'extra'       Points to an associated pcre_extra structure (result of pcre_study), or is NULL
--
-- * 'subject'      Points to the subject string
--
-- * 'length'       Length of the subject string, in bytes
--
-- * 'startoffset'  Offset in bytes in the subject at which to start matching
--
-- * 'options'      Option bits
--
-- * 'ovector'      Points to a vector of ints for result substrings
--
-- * 'ovecsize'     Number of elements in the vector (a  multiple of 3)
--
-- Note, subject not required to be null terminated.
--
foreign import ccall unsafe "pcre.h pcre_exec"
    c_pcre_exec     :: Ptr PCRE
                    -> Ptr PCREExtra
                    -> Ptr Word8
                    -> CInt
                    -> CInt
                    -> PCREExecOption
                    -> Ptr CInt
                    -> CInt
                    -> IO CInt

-- | Return information about a compiled pattern
foreign import ccall unsafe "pcre.h pcre_fullinfo"
    c_pcre_fullinfo :: Ptr PCRE
                    -> Ptr PCREExtra
                    -> PCREInfo
                    -> Ptr a
                    -> IO CInt