{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeApplications #-}

-- For functions that can fail for bytes outside the ASCII range, see
-- 'Data.Bytes.Ascii'. For functions that can inspect bytes outside ASCII, see
-- any of the modules for ASCII-compatible encodings (e.g. 'Data.Bytes.Utf8',
-- 'Data.Bytes.Latin1', and so on).

{- | This module contains functions which operate on supersets of 'Bytes' containing ASCII-encoded text.
That is, none of the functions here inspect bytes with a value greater than 127, and do not fail due to the presence of such bytes.
-}
module Data.Bytes.Text.AsciiExt
  ( -- * Line-Oriented IO
    hFoldLines
  , hForLines_

    -- ** Standard Handles
  , forLines_
  , foldLines

    -- * Predicates
  , anyEq

    -- * Filtering
  , takeWhileNotEq
  , dropWhileNotEq
  , takeWhileEndNotEq
  , dropWhileEndEq

    -- * Splitting
    -- ** Fixed from Beginning
  , split1
  , splitTetragram1
  , split2
  , split3
  , split4

    -- * Text Manipulation
  , toLowerU
  ) where

import Control.Monad.ST (ST)
import Control.Monad.ST.Run (runByteArrayST)
import Data.Bytes.Types (Bytes (..))
import Data.Char (ord)
import Data.Primitive (ByteArray)
import Data.Word (Word8)
import System.IO (Handle, hIsEOF, stdin)

import qualified Data.ByteString.Char8 as BC8
import qualified Data.Bytes.Pure as Bytes
import qualified Data.Bytes.Byte as Byte
import qualified Data.Primitive as PM

-- | `hForLines_` over `stdin`
forLines_ :: (Bytes -> IO a) -> IO ()
{-# INLINEABLE forLines_ #-}
forLines_ :: forall a. (Bytes -> IO a) -> IO ()
forLines_ = Handle -> (Bytes -> IO a) -> IO ()
forall a. Handle -> (Bytes -> IO a) -> IO ()
hForLines_ Handle
stdin

-- | `hFoldLines` over `stdin`
foldLines :: a -> (a -> Bytes -> IO a) -> IO a
{-# INLINEABLE foldLines #-}
foldLines :: forall a. a -> (a -> Bytes -> IO a) -> IO a
foldLines = Handle -> a -> (a -> Bytes -> IO a) -> IO a
forall a. Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines Handle
stdin

{- | Perform an action on each line of the input, discarding results.
To maintain a running state, see 'hFoldLines'.

Lines are extracted with with 'BC8.hGetLine', which does not document its
detection algorithm. As of writing (bytestring v0.11.1.0), lines are
delimited by a single @\n@ character (UNIX-style, as all things should be).
-}
hForLines_ :: Handle -> (Bytes -> IO a) -> IO ()
hForLines_ :: forall a. Handle -> (Bytes -> IO a) -> IO ()
hForLines_ Handle
h Bytes -> IO a
body = IO ()
loop
 where
  loop :: IO ()
loop =
    Handle -> IO Bool
hIsEOF Handle
h IO Bool -> (Bool -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
      Bool
False -> do
        Bytes
line <- ByteString -> Bytes
Bytes.fromByteString (ByteString -> Bytes) -> IO ByteString -> IO Bytes
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> IO ByteString
BC8.hGetLine Handle
h
        a
_ <- Bytes -> IO a
body Bytes
line
        IO ()
loop
      Bool
True -> () -> IO ()
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

{- | Perform an action on each line of the input, threading state through the computation.
If you do not need to keep a state, see `hForLines_`.

Lines are extracted with with 'BC8.hGetLine', which does not document its
detection algorithm. As of writing (bytestring v0.11.1.0), lines are
delimited by a single @\n@ character (UNIX-style, as all things should be).
-}
hFoldLines :: Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines :: forall a. Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines Handle
h a
z a -> Bytes -> IO a
body = a -> IO a
loop a
z
 where
  loop :: a -> IO a
loop !a
x =
    Handle -> IO Bool
hIsEOF Handle
h IO Bool -> (Bool -> IO a) -> IO a
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
      Bool
False -> do
        Bytes
line <- ByteString -> Bytes
Bytes.fromByteString (ByteString -> Bytes) -> IO ByteString -> IO Bytes
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> IO ByteString
BC8.hGetLine Handle
h
        a
x' <- a -> Bytes -> IO a
body a
x Bytes
line
        a -> IO a
loop a
x'
      Bool
True -> a -> IO a
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
x

{- | /O(n)/ Convert ASCII letters to lowercase. This adds @0x20@ to bytes in the
range @[0x41,0x5A]@ (@A-Z@ ⇒ @a-z@) and leaves all other bytes alone.
Unconditionally copies the bytes.
-}
toLowerU :: Bytes -> ByteArray
toLowerU :: Bytes -> ByteArray
toLowerU (Bytes ByteArray
src Int
off0 Int
len0) =
  (forall s. ST s ByteArray) -> ByteArray
runByteArrayST ST s ByteArray
forall s. ST s ByteArray
action
 where
  action :: forall s. ST s ByteArray
  action :: forall s. ST s ByteArray
action = do
    MutableByteArray (PrimState (ST s))
dst <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len0
    let go :: Int -> Int -> t -> ST s ()
go !Int
off !Int
ix !t
len =
          if t
len t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
0
            then () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
            else do
              let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray ByteArray
src Int
off :: Word8
                  w' :: Word8
w' =
                    if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x41 Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x5A
                      then Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
32
                      else Word8
w
              MutableByteArray (PrimState (ST s)) -> Int -> Word8 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableByteArray (PrimState m) -> Int -> a -> m ()
PM.writeByteArray MutableByteArray (PrimState (ST s))
dst Int
ix Word8
w'
              Int -> Int -> t -> ST s ()
go (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (t
len t -> t -> t
forall a. Num a => a -> a -> a
- t
1)
    Int -> Int -> Int -> ST s ()
forall {t}. (Eq t, Num t) => Int -> Int -> t -> ST s ()
go Int
off0 Int
0 Int
len0
    MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray (PrimState (ST s))
dst

-- | Throws an exception the 'Char' argument is non-ascii.
split1 :: Char -> Bytes -> Maybe (Bytes, Bytes)
{-# INLINE split1 #-}
split1 :: Char -> Bytes -> Maybe (Bytes, Bytes)
split1 !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Maybe (Bytes, Bytes)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.split1: argument not in ASCII range"
  | Bool
otherwise = Word8 -> Bytes -> Maybe (Bytes, Bytes)
Byte.split1 (Char -> Word8
c2w Char
c) Bytes
b

-- | Throws an exception the 'Char' argument is non-ascii.
split2 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes)
{-# INLINE split2 #-}
split2 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes)
split2 !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Maybe (Bytes, Bytes, Bytes)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.split2: argument not in ASCII range"
  | Bool
otherwise = Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes)
Byte.split2 (Char -> Word8
c2w Char
c) Bytes
b

-- | Throws an exception the 'Char' argument is non-ascii.
split3 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes)
{-# INLINE split3 #-}
split3 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes)
split3 !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Maybe (Bytes, Bytes, Bytes, Bytes)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.split3: argument not in ASCII range"
  | Bool
otherwise = Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes)
Byte.split3 (Char -> Word8
c2w Char
c) Bytes
b

-- | Throws an exception the 'Char' argument is non-ascii.
split4 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes, Bytes)
{-# INLINE split4 #-}
split4 :: Char -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes, Bytes)
split4 !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Maybe (Bytes, Bytes, Bytes, Bytes, Bytes)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.split4: argument not in ASCII range"
  | Bool
otherwise = Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes, Bytes)
Byte.split4 (Char -> Word8
c2w Char
c) Bytes
b

-- | Throws an exception if any of the 'Char' arguments are non-ascii.
splitTetragram1 :: Char -> Char -> Char -> Char -> Bytes -> Maybe (Bytes, Bytes)
{-# inline splitTetragram1 #-}
splitTetragram1 :: Char -> Char -> Char -> Char -> Bytes -> Maybe (Bytes, Bytes)
splitTetragram1 !Char
c0 !Char
c1 !Char
c2 !Char
c3 !Bytes
b
  | Char
c0 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' Bool -> Bool -> Bool
|| Char
c1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' Bool -> Bool -> Bool
|| Char
c2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' Bool -> Bool -> Bool
|| Char
c3 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' =
      [Char] -> Maybe (Bytes, Bytes)
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.splitTetragram1: one of the characters is not in ASCII range"
  | Bool
otherwise = Word8 -> Word8 -> Word8 -> Word8 -> Bytes -> Maybe (Bytes, Bytes)
Bytes.splitTetragram1 (Char -> Word8
c2w Char
c0) (Char -> Word8
c2w Char
c1) (Char -> Word8
c2w Char
c2) (Char -> Word8
c2w Char
c3) Bytes
b

c2w :: Char -> Word8
{-# inline c2w #-}
c2w :: Char -> Word8
c2w !Char
c = forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 (Char -> Int
ord Char
c)

-- | Throws an exception the 'Char' argument is non-ascii.
dropWhileNotEq :: Char -> Bytes -> Bytes
dropWhileNotEq :: Char -> Bytes -> Bytes
dropWhileNotEq !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Bytes
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.dropWhileNotEq: argument not in ASCII range"
  | Bool
otherwise =
      let !w :: Word8
w = Char -> Word8
c2w Char
c
       in Int -> Bytes -> Bytes
Bytes.unsafeDrop ((Word8 -> Bool) -> Bytes -> Int
Bytes.countWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
w) Bytes
b) Bytes
b

-- | Throws an exception the 'Char' argument is non-ascii.
takeWhileNotEq :: Char -> Bytes -> Bytes
takeWhileNotEq :: Char -> Bytes -> Bytes
takeWhileNotEq !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Bytes
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.takeWhileNotEq: argument not in ASCII range"
  | Bool
otherwise =
      let !w :: Word8
w = Char -> Word8
c2w Char
c
       in Int -> Bytes -> Bytes
Bytes.unsafeTake ((Word8 -> Bool) -> Bytes -> Int
Bytes.countWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
w) Bytes
b) Bytes
b
  
-- | Throws an exception the 'Char' argument is non-ascii.
takeWhileEndNotEq :: Char -> Bytes -> Bytes
takeWhileEndNotEq :: Char -> Bytes -> Bytes
takeWhileEndNotEq !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Bytes
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.takeWhileEndNotEq: argument not in ASCII range"
  | Bool
otherwise =
      let !w :: Word8
w = Char -> Word8
c2w Char
c
          !n :: Int
n = (Word8 -> Bool) -> Bytes -> Int
Bytes.countWhileEnd (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/=Word8
w) Bytes
b
       in ByteArray -> Int -> Int -> Bytes
Bytes (Bytes -> ByteArray
array Bytes
b) (Bytes -> Int
offset Bytes
b Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bytes -> Int
Bytes.length Bytes
b Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) Int
n

-- | Throws an exception the 'Char' argument is non-ascii.
dropWhileEndEq :: Char -> Bytes -> Bytes
dropWhileEndEq :: Char -> Bytes -> Bytes
dropWhileEndEq !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Bytes
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.dropWhileEndEq: argument not in ASCII range"
  | Bool
otherwise =
      let !w :: Word8
w = Char -> Word8
c2w Char
c
          !n :: Int
n = (Word8 -> Bool) -> Bytes -> Int
Bytes.countWhileEnd (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==Word8
w) Bytes
b
       in Int -> Bytes -> Bytes
Bytes.unsafeTake (Bytes -> Int
Bytes.length Bytes
b Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) Bytes
b

-- | Throws an exception the 'Char' argument is non-ascii.
anyEq :: Char -> Bytes -> Bool
anyEq :: Char -> Bytes -> Bool
anyEq !Char
c !Bytes
b
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\DEL' = [Char] -> Bool
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.Text.AsciiExt.takeWhileNotEq: argument not in ASCII range"
  | Bool
otherwise =
      let !w :: Word8
w = Char -> Word8
c2w Char
c
       in (Word8 -> Bool) -> Bytes -> Bool
Bytes.any (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
==Word8
w) Bytes
b