{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CApiFFI #-}
{-# LANGUAGE ForeignFunctionInterface #-}
module DataFrame.IO.Unstable.CSV (
fastReadCsvUnstable,
readCsvUnstable,
) where
import qualified Data.Vector as Vector
import qualified Data.Vector.Storable as VS
import Data.Vector.Storable.Mutable (
grow,
unsafeFromForeignPtr,
)
import qualified Data.Vector.Storable.Mutable as VSM
import System.IO.MMap (
Mode (WriteCopy),
mmapFileForeignPtr,
)
import Foreign (
Ptr,
castForeignPtr,
castPtr,
mallocArray,
newForeignPtr_,
)
import Foreign.C.Types
import qualified Data.ByteString as BS
import Data.ByteString.Internal (ByteString (PS))
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as TextEncoding
import Data.Word (Word8)
import Control.Parallel.Strategies (parList, rpar, using)
import Data.Array.IArray (array, (!))
import Data.Array.Unboxed (UArray)
import Data.Ix (range)
import DataFrame.IO.CSV (
HeaderSpec (..),
ReadOptions (..),
defaultReadOptions,
shouldInferFromSample,
typeInferenceSampleSize,
)
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Operations.Typing (parseFromExamples)
fastReadCsvUnstable :: FilePath -> IO DataFrame
fastReadCsvUnstable :: FilePath -> IO DataFrame
fastReadCsvUnstable =
ReadOptions
-> (Int -> Vector Word8 -> IO (Vector CSize))
-> FilePath
-> IO DataFrame
readCsvUnstable'
ReadOptions
defaultReadOptions
Int -> Vector Word8 -> IO (Vector CSize)
getDelimiterIndices
readCsvUnstable :: FilePath -> IO DataFrame
readCsvUnstable :: FilePath -> IO DataFrame
readCsvUnstable =
ReadOptions
-> (Int -> Vector Word8 -> IO (Vector CSize))
-> FilePath
-> IO DataFrame
readCsvUnstable'
ReadOptions
defaultReadOptions
( \Int
originalLen Vector Word8
v -> do
Ptr CSize
indices <- Int -> IO (Ptr CSize)
forall a. Storable a => Int -> IO (Ptr a)
mallocArray Int
originalLen
Int -> Vector Word8 -> Ptr CSize -> IO (Vector CSize)
getDelimiterIndices_ Int
originalLen Vector Word8
v Ptr CSize
indices
)
readCsvUnstable' ::
ReadOptions ->
(Int -> VS.Vector Word8 -> IO (VS.Vector CSize)) ->
FilePath ->
IO DataFrame
readCsvUnstable' :: ReadOptions
-> (Int -> Vector Word8 -> IO (Vector CSize))
-> FilePath
-> IO DataFrame
readCsvUnstable' ReadOptions
opts Int -> Vector Word8 -> IO (Vector CSize)
delimiterIndices FilePath
filePath = do
(ForeignPtr Word8
bufferPtr, Int
offset, Int
len) <-
FilePath
-> Mode -> Maybe (Int64, Int) -> IO (ForeignPtr Word8, Int, Int)
forall a.
FilePath
-> Mode -> Maybe (Int64, Int) -> IO (ForeignPtr a, Int, Int)
mmapFileForeignPtr
FilePath
filePath
Mode
WriteCopy
Maybe (Int64, Int)
forall a. Maybe a
Nothing
let mutableFile :: MVector s Word8
mutableFile = ForeignPtr Word8 -> Int -> Int -> MVector s Word8
forall a s. Storable a => ForeignPtr a -> Int -> Int -> MVector s a
unsafeFromForeignPtr ForeignPtr Word8
bufferPtr Int
offset Int
len
MVector RealWorld Word8
paddedMutableFile <- MVector (PrimState IO) Word8
-> Int -> IO (MVector (PrimState IO) Word8)
forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
grow MVector RealWorld Word8
MVector (PrimState IO) Word8
forall {s}. MVector s Word8
mutableFile Int
64
Vector Word8
paddedCSVFile <- MVector (PrimState IO) Word8 -> IO (Vector Word8)
forall a (m :: * -> *).
(Storable a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VS.unsafeFreeze MVector RealWorld Word8
MVector (PrimState IO) Word8
paddedMutableFile
Vector CSize
indices <- Int -> Vector Word8 -> IO (Vector CSize)
delimiterIndices Int
len Vector Word8
paddedCSVFile
let numCol :: Int
numCol = Vector Word8 -> Vector CSize -> Int
countColumnsInFirstRow Vector Word8
paddedCSVFile Vector CSize
indices
totalRows :: Int
totalRows = Vector CSize -> Int
forall a. Storable a => Vector a -> Int
VS.length Vector CSize
indices Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
numCol
extractField' :: Int -> Text
extractField' = Vector Word8 -> Vector CSize -> Int -> Text
extractField Vector Word8
paddedCSVFile Vector CSize
indices
(Vector Text
columnNames, Int
dataStartRow) = case ReadOptions -> HeaderSpec
headerSpec ReadOptions
opts of
HeaderSpec
NoHeader ->
( [Text] -> Vector Text
forall a. [a] -> Vector a
Vector.fromList ([Text] -> Vector Text) -> [Text] -> Vector Text
forall a b. (a -> b) -> a -> b
$
(Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (FilePath -> Text
Text.pack (FilePath -> Text) -> (Int -> FilePath) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> FilePath
forall a. Show a => a -> FilePath
show) [Int
0 .. Int
numCol Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
, Int
0
)
HeaderSpec
UseFirstRow ->
( [Text] -> Vector Text
forall a. [a] -> Vector a
Vector.fromList ([Text] -> Vector Text) -> [Text] -> Vector Text
forall a b. (a -> b) -> a -> b
$
(Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Text
extractField' [Int
0 .. Int
numCol Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
, Int
1
)
ProvideNames [Text]
ns ->
([Text] -> Vector Text
forall a. [a] -> Vector a
Vector.fromList [Text]
ns, Int
0)
numRow :: Int
numRow = Int
totalRows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
dataStartRow
parseTypes :: Vector Text -> Column
parseTypes Vector Text
col =
let n :: Int
n =
if TypeSpec -> Bool
shouldInferFromSample (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts)
then TypeSpec -> Int
typeInferenceSampleSize (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts)
else Int
0
in Int -> FilePath -> Bool -> Vector Text -> Column
parseFromExamples
Int
n
(ReadOptions -> FilePath
dateFormat ReadOptions
opts)
(ReadOptions -> Bool
safeRead ReadOptions
opts)
Vector Text
col
generateColumn :: Int -> Column
generateColumn Int
col =
Vector Text -> Column
parseTypes (Vector Text -> Column) -> Vector Text -> Column
forall a b. (a -> b) -> a -> b
$
Int -> [Text] -> Vector Text
forall a. Int -> [a] -> Vector a
Vector.fromListN
Int
numRow
( (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map
( \Int
row ->
Int -> Text
extractField'
(Int
row Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
numCol Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
col)
)
[Int
dataStartRow .. Int
totalRows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
)
columns :: Vector Column
columns =
Int -> [Column] -> Vector Column
forall a. Int -> [a] -> Vector a
Vector.fromListN
Int
numCol
( (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Column
generateColumn [Int
0 .. Int
numCol Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
[Column] -> Strategy [Column] -> [Column]
forall a. a -> Strategy a -> a
`using` Strategy Column -> Strategy [Column]
forall a. Strategy a -> Strategy [a]
parList Strategy Column
forall a. Strategy a
rpar
)
columnIndices :: Map Text Int
columnIndices =
[(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, Int)] -> Map Text Int) -> [(Text, Int)] -> Map Text Int
forall a b. (a -> b) -> a -> b
$
[Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Vector Text -> [Text]
forall a. Vector a -> [a]
Vector.toList Vector Text
columnNames) [Int
0 ..]
dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
numRow, Int
numCol)
DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame
forall a b. (a -> b) -> a -> b
$
Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame Vector Column
columns Map Text Int
columnIndices (Int, Int)
dataframeDimensions
{-# INLINE extractField #-}
extractField ::
VS.Vector Word8 ->
VS.Vector CSize ->
Int ->
Text
Vector Word8
file Vector CSize
indices Int
position =
ByteString -> Text
TextEncoding.decodeUtf8Lenient
(ByteString -> Text)
-> (Vector Word8 -> ByteString) -> Vector Word8 -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Word8 -> ByteString
unsafeToByteString
(Vector Word8 -> Text) -> Vector Word8 -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Vector Word8 -> Vector Word8
forall a. Storable a => Int -> Int -> Vector a -> Vector a
VS.slice
Int
previous
(Int
next Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
previous)
Vector Word8
file
where
previous :: Int
previous =
if Int
position Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
0
else Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector CSize
indices Vector CSize -> Int -> CSize
forall a. Storable a => Vector a -> Int -> a
VS.! (Int
position Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
next :: Int
next = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> Int) -> CSize -> Int
forall a b. (a -> b) -> a -> b
$ Vector CSize
indices Vector CSize -> Int -> CSize
forall a. Storable a => Vector a -> Int -> a
VS.! Int
position
unsafeToByteString :: VS.Vector Word8 -> BS.ByteString
unsafeToByteString :: Vector Word8 -> ByteString
unsafeToByteString Vector Word8
v = ForeignPtr Word8 -> Int -> Int -> ByteString
PS (ForeignPtr Word8 -> ForeignPtr Word8
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr Word8
ptr) Int
0 Int
len
where
(ForeignPtr Word8
ptr, Int
len) = Vector Word8 -> (ForeignPtr Word8, Int)
forall a. Vector a -> (ForeignPtr a, Int)
VS.unsafeToForeignPtr0 Vector Word8
v
foreign import capi "process_csv.h get_delimiter_indices"
get_delimiter_indices ::
Ptr CUChar ->
CSize ->
Ptr CSize ->
IO CSize
{-# INLINE getDelimiterIndices #-}
getDelimiterIndices ::
Int ->
VS.Vector Word8 ->
IO (VS.Vector CSize)
getDelimiterIndices :: Int -> Vector Word8 -> IO (Vector CSize)
getDelimiterIndices Int
originalLen Vector Word8
csvFile =
Vector Word8
-> (Ptr Word8 -> IO (Vector CSize)) -> IO (Vector CSize)
forall a b. Storable a => Vector a -> (Ptr a -> IO b) -> IO b
VS.unsafeWith Vector Word8
csvFile ((Ptr Word8 -> IO (Vector CSize)) -> IO (Vector CSize))
-> (Ptr Word8 -> IO (Vector CSize)) -> IO (Vector CSize)
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
buffer -> do
let paddedLen :: Int
paddedLen = Vector Word8 -> Int
forall a. Storable a => Vector a -> Int
VS.length Vector Word8
csvFile
Ptr CSize
indices <- Int -> IO (Ptr CSize)
forall a. Storable a => Int -> IO (Ptr a)
mallocArray Int
paddedLen
CSize
num_fields <-
Ptr CUChar -> CSize -> Ptr CSize -> IO CSize
get_delimiter_indices
(Ptr Word8 -> Ptr CUChar
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
buffer)
(Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
paddedLen)
(Ptr CSize -> Ptr CSize
forall a b. Ptr a -> Ptr b
castPtr Ptr CSize
indices)
if CSize
num_fields CSize -> CSize -> Bool
forall a. Eq a => a -> a -> Bool
== -CSize
1
then Int -> Vector Word8 -> Ptr CSize -> IO (Vector CSize)
getDelimiterIndices_ Int
originalLen Vector Word8
csvFile Ptr CSize
indices
else do
ForeignPtr CSize
indices' <- Ptr CSize -> IO (ForeignPtr CSize)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ Ptr CSize
indices
let resultVector :: MVector s CSize
resultVector = ForeignPtr CSize -> Int -> MVector s CSize
forall a s. ForeignPtr a -> Int -> MVector s a
VSM.unsafeFromForeignPtr0 ForeignPtr CSize
indices' Int
paddedLen
Int
finalResultLen <-
if Int
originalLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Vector Word8
csvFile Vector Word8 -> Int -> Word8
forall a. Storable a => Vector a -> Int -> a
VS.! (Int
originalLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
lf
then do
MVector (PrimState IO) CSize -> Int -> CSize -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VSM.write MVector RealWorld CSize
MVector (PrimState IO) CSize
forall {s}. MVector s CSize
resultVector (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
num_fields) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
originalLen)
Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
num_fields Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
else Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
num_fields)
MVector (PrimState IO) CSize -> IO (Vector CSize)
forall a (m :: * -> *).
(Storable a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VS.unsafeFreeze (MVector (PrimState IO) CSize -> IO (Vector CSize))
-> MVector (PrimState IO) CSize -> IO (Vector CSize)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> MVector RealWorld CSize -> MVector RealWorld CSize
forall a s. Storable a => Int -> Int -> MVector s a -> MVector s a
VSM.slice Int
0 Int
finalResultLen MVector RealWorld CSize
forall {s}. MVector s CSize
resultVector
lf, cr, comma, quote :: Word8
lf :: Word8
lf = Word8
0x0A
cr :: Word8
cr = Word8
0x0D
comma :: Word8
comma = Word8
0x2C
quote :: Word8
quote = Word8
0x22
data State
= UnEscaped
| Escaped
deriving (Int -> State
State -> Int
State -> [State]
State -> State
State -> State -> [State]
State -> State -> State -> [State]
(State -> State)
-> (State -> State)
-> (Int -> State)
-> (State -> Int)
-> (State -> [State])
-> (State -> State -> [State])
-> (State -> State -> [State])
-> (State -> State -> State -> [State])
-> Enum State
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: State -> State
succ :: State -> State
$cpred :: State -> State
pred :: State -> State
$ctoEnum :: Int -> State
toEnum :: Int -> State
$cfromEnum :: State -> Int
fromEnum :: State -> Int
$cenumFrom :: State -> [State]
enumFrom :: State -> [State]
$cenumFromThen :: State -> State -> [State]
enumFromThen :: State -> State -> [State]
$cenumFromTo :: State -> State -> [State]
enumFromTo :: State -> State -> [State]
$cenumFromThenTo :: State -> State -> State -> [State]
enumFromThenTo :: State -> State -> State -> [State]
Enum)
{-# INLINE stateTransitionTable #-}
stateTransitionTable :: UArray (Int, Word8) Int
stateTransitionTable :: UArray (Int, Word8) Int
stateTransitionTable = ((Int, Word8), (Int, Word8))
-> [((Int, Word8), Int)] -> UArray (Int, Word8) Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array ((Int
0, Word8
0), (Int
1, Word8
255)) [((Int, Word8)
i, (Int, Word8) -> Int
forall {a}. (Eq a, Num a) => (Int, a) -> Int
f (Int, Word8)
i) | (Int, Word8)
i <- ((Int, Word8), (Int, Word8)) -> [(Int, Word8)]
forall a. Ix a => (a, a) -> [a]
range ((Int
0, Word8
0), (Int
1, Word8
255))]
where
f :: (Int, a) -> Int
f (Int
0, a
0x0A) = State -> Int
forall a. Enum a => a -> Int
fromEnum State
UnEscaped
f (Int
0, a
0x2C) = State -> Int
forall a. Enum a => a -> Int
fromEnum State
UnEscaped
f (Int
0, a
0x22) = State -> Int
forall a. Enum a => a -> Int
fromEnum State
Escaped
f (Int
1, a
0x22) = State -> Int
forall a. Enum a => a -> Int
fromEnum State
UnEscaped
f (Int
state, a
_) = Int
state
{-# INLINE getDelimiterIndices_ #-}
getDelimiterIndices_ ::
Int ->
VS.Vector Word8 ->
Ptr CSize ->
IO (VS.Vector CSize)
getDelimiterIndices_ :: Int -> Vector Word8 -> Ptr CSize -> IO (Vector CSize)
getDelimiterIndices_ Int
originalLen Vector Word8
csvFile Ptr CSize
resultPtr = do
MVector RealWorld CSize
resultVector <- IO (MVector RealWorld CSize)
forall {s}. IO (MVector s CSize)
resultVectorM
(State
_, Int
resultLen) <-
((State, Int) -> Int -> Word8 -> IO (State, Int))
-> (State, Int) -> Vector Word8 -> IO (State, Int)
forall (m :: * -> *) b a.
(Monad m, Storable b) =>
(a -> Int -> b -> m a) -> a -> Vector b -> m a
VS.ifoldM'
(MVector RealWorld CSize
-> (State, Int) -> Int -> Word8 -> IO (State, Int)
processCharacter MVector RealWorld CSize
resultVector)
(State
UnEscaped, Int
0)
Vector Word8
csvFile
Int
finalResultLen <-
if Int
originalLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Vector Word8
csvFile Vector Word8 -> Int -> Word8
forall a. Storable a => Vector a -> Int -> a
VS.! (Int
originalLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
lf
then do
MVector (PrimState IO) CSize -> Int -> CSize -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VSM.write MVector RealWorld CSize
MVector (PrimState IO) CSize
resultVector Int
resultLen (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
originalLen)
Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
resultLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
else Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
resultLen
MVector (PrimState IO) CSize -> IO (Vector CSize)
forall a (m :: * -> *).
(Storable a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VS.unsafeFreeze (MVector (PrimState IO) CSize -> IO (Vector CSize))
-> MVector (PrimState IO) CSize -> IO (Vector CSize)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> MVector RealWorld CSize -> MVector RealWorld CSize
forall a s. Storable a => Int -> Int -> MVector s a -> MVector s a
VSM.slice Int
0 Int
finalResultLen MVector RealWorld CSize
resultVector
where
paddedLen :: Int
paddedLen = Vector Word8 -> Int
forall a. Storable a => Vector a -> Int
VS.length Vector Word8
csvFile
resultVectorM :: IO (MVector s CSize)
resultVectorM = do
ForeignPtr CSize
resultForeignPtr <- Ptr CSize -> IO (ForeignPtr CSize)
forall a. Ptr a -> IO (ForeignPtr a)
newForeignPtr_ Ptr CSize
resultPtr
MVector s CSize -> IO (MVector s CSize)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (MVector s CSize -> IO (MVector s CSize))
-> MVector s CSize -> IO (MVector s CSize)
forall a b. (a -> b) -> a -> b
$ ForeignPtr CSize -> Int -> MVector s CSize
forall a s. ForeignPtr a -> Int -> MVector s a
VSM.unsafeFromForeignPtr0 ForeignPtr CSize
resultForeignPtr Int
paddedLen
processCharacter ::
VSM.IOVector CSize ->
(State, Int) ->
Int ->
Word8 ->
IO (State, Int)
processCharacter :: MVector RealWorld CSize
-> (State, Int) -> Int -> Word8 -> IO (State, Int)
processCharacter
MVector RealWorld CSize
resultVector
(!State
state, !Int
resultIndex)
Int
index
Word8
character =
case State
state of
State
UnEscaped ->
if Word8
character Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
lf Bool -> Bool -> Bool
|| Word8
character Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
comma
then do
MVector (PrimState IO) CSize -> Int -> CSize -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Storable a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VSM.write
MVector RealWorld CSize
MVector (PrimState IO) CSize
resultVector
Int
resultIndex
(Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
index)
(State, Int) -> IO (State, Int)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (State
newState, Int
resultIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
else (State, Int) -> IO (State, Int)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (State
newState, Int
resultIndex)
State
Escaped -> (State, Int) -> IO (State, Int)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (State
newState, Int
resultIndex)
where
newState :: State
newState =
Int -> State
forall a. Enum a => Int -> a
toEnum (Int -> State) -> Int -> State
forall a b. (a -> b) -> a -> b
$
UArray (Int, Word8) Int
stateTransitionTable
UArray (Int, Word8) Int -> (Int, Word8) -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! (State -> Int
forall a. Enum a => a -> Int
fromEnum State
state, Word8
character)
{-# INLINE countColumnsInFirstRow #-}
countColumnsInFirstRow ::
VS.Vector Word8 ->
VS.Vector CSize ->
Int
countColumnsInFirstRow :: Vector Word8 -> Vector CSize -> Int
countColumnsInFirstRow Vector Word8
file Vector CSize
indices
| Vector CSize -> Int
forall a. Storable a => Vector a -> Int
VS.length Vector CSize
indices Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Int
0
| Bool
otherwise =
Int
1
Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector CSize -> Int
forall a. Storable a => Vector a -> Int
VS.length
( (CSize -> Bool) -> Vector CSize -> Vector CSize
forall a. Storable a => (a -> Bool) -> Vector a -> Vector a
VS.takeWhile
(\CSize
i -> Vector Word8
file Vector Word8 -> Int -> Word8
forall a. Storable a => Vector a -> Int -> a
VS.! CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
lf)
Vector CSize
indices
)