{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.IO.CSV where

import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Lazy as BL
import qualified Data.List as L
import qualified Data.Map.Strict as M
import qualified Data.Proxy as P
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.IO as TIO
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM

import Data.Csv.Streaming (Records (..))
import qualified Data.Csv.Streaming as CsvStream

import Control.Monad
import Data.Char
import qualified Data.Csv as Csv
import Data.Either
import Data.Function (on)
import Data.Functor
import Data.IORef
import Data.Maybe
import Data.Type.Equality (TestEquality (testEquality))
import Data.Word (Word8)
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Internal.Parsing
import DataFrame.Internal.Schema
import DataFrame.Operations.Typing
import System.IO
import Type.Reflection
import Prelude hiding (concat, takeWhile)

chunkSize :: Int
chunkSize :: Int
chunkSize = Int
16_384

data PagedVector a = PagedVector
    { forall a. PagedVector a -> IORef [Vector a]
pvChunks :: !(IORef [V.Vector a])
    -- ^ Finished chunks (reverse order)
    , forall a. PagedVector a -> IORef (IOVector a)
pvActive :: !(IORef (VM.IOVector a))
    -- ^ Current mutable chunk
    , forall a. PagedVector a -> IORef Int
pvCount :: !(IORef Int)
    -- ^ Items written in current chunk
    }

data PagedUnboxedVector a = PagedUnboxedVector
    { forall a. PagedUnboxedVector a -> IORef [Vector a]
puvChunks :: !(IORef [VU.Vector a])
    , forall a. PagedUnboxedVector a -> IORef (IOVector a)
puvActive :: !(IORef (VUM.IOVector a))
    , forall a. PagedUnboxedVector a -> IORef Int
puvCount :: !(IORef Int)
    }

data BuilderColumn
    = BuilderInt !(PagedUnboxedVector Int) !(PagedUnboxedVector Word8)
    | BuilderDouble !(PagedUnboxedVector Double) !(PagedUnboxedVector Word8)
    | BuilderText !(PagedVector T.Text) !(PagedUnboxedVector Word8)
    | BuilderBS !(PagedVector BS.ByteString) !(PagedUnboxedVector Word8)

newPagedVector :: IO (PagedVector a)
newPagedVector :: forall a. IO (PagedVector a)
newPagedVector = do
    IOVector a
active <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
chunkSize
    IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedVector a
forall a.
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedVector a
PagedVector (IORef [Vector a]
 -> IORef (IOVector a) -> IORef Int -> PagedVector a)
-> IO (IORef [Vector a])
-> IO (IORef (IOVector a) -> IORef Int -> PagedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Vector a] -> IO (IORef [Vector a])
forall a. a -> IO (IORef a)
newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedVector a)
-> IO (IORef (IOVector a)) -> IO (IORef Int -> PagedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
active IO (IORef Int -> PagedVector a)
-> IO (IORef Int) -> IO (PagedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0

newPagedUnboxedVector :: (VUM.Unbox a) => IO (PagedUnboxedVector a)
newPagedUnboxedVector :: forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector = do
    IOVector a
active <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
chunkSize
    IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a
forall a.
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a
PagedUnboxedVector (IORef [Vector a]
 -> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
-> IO (IORef [Vector a])
-> IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Vector a] -> IO (IORef [Vector a])
forall a. a -> IO (IORef a)
newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> PagedUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
active IO (IORef Int -> PagedUnboxedVector a)
-> IO (IORef Int) -> IO (PagedUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0

appendPagedVector :: PagedVector a -> a -> IO ()
appendPagedVector :: forall a. PagedVector a -> a -> IO ()
appendPagedVector (PagedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) !a
val = do
    Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
    IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef

    if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
chunkSize
        then do
            MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
active Int
count a
val
            IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
        else do
            Vector a
frozen <- MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze IOVector a
MVector (PrimState IO) a
active
            IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Vector a]
chunksRef (Vector a
frozen Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
:)

            IOVector a
newActive <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
chunkSize
            MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
newActive Int
0 a
val

            IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
activeRef IOVector a
newActive
            IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef Int
1
{-# INLINE appendPagedVector #-}

appendPagedUnboxedVector :: (VUM.Unbox a) => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector (PagedUnboxedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) !a
val = do
    Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
    IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef

    if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
chunkSize
        then do
            MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
active Int
count a
val
            IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
        else do
            Vector a
frozen <- MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze IOVector a
MVector (PrimState IO) a
active
            IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Vector a]
chunksRef (Vector a
frozen Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
:)

            IOVector a
newActive <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
chunkSize
            MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
newActive Int
0 a
val

            IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
activeRef IOVector a
newActive
            IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef Int
1
{-# INLINE appendPagedUnboxedVector #-}

freezePagedVector :: PagedVector a -> IO (V.Vector a)
freezePagedVector :: forall a. PagedVector a -> IO (Vector a)
freezePagedVector (PagedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) = do
    Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
    IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
    [Vector a]
chunks <- IORef [Vector a] -> IO [Vector a]
forall a. IORef a -> IO a
readIORef IORef [Vector a]
chunksRef

    IORef [Vector a] -> [Vector a] -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef [Vector a]
chunksRef [] -- release chunk references
    let frozenChunks :: [Vector a]
frozenChunks = [Vector a] -> [Vector a]
forall a. [a] -> [a]
reverse [Vector a]
chunks
        totalLen :: Int
totalLen = Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Vector a -> Int) -> [Vector a] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Int
forall a. Vector a -> Int
V.length [Vector a]
frozenChunks)

    IOVector a
mv <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
totalLen

    let copyChunk :: Int -> Vector a -> IO Int
copyChunk !Int
offset Vector a
chunk = do
            MVector (PrimState IO) a -> Vector a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Vector a -> m ()
V.copy (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
offset (Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
chunk) IOVector a
mv) Vector a
chunk
            Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
chunk)

    Int
offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Int -> Vector a -> IO Int
copyChunk Int
0 [Vector a]
frozenChunks
    MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
VM.copy (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
offset Int
count IOVector a
mv) (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
0 Int
count IOVector a
active)

    MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze IOVector a
MVector (PrimState IO) a
mv

freezePagedUnboxedVector ::
    (VUM.Unbox a) => PagedUnboxedVector a -> IO (VU.Vector a)
freezePagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector (PagedUnboxedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) = do
    Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
    IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
    [Vector a]
chunks <- IORef [Vector a] -> IO [Vector a]
forall a. IORef a -> IO a
readIORef IORef [Vector a]
chunksRef

    IORef [Vector a] -> [Vector a] -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef [Vector a]
chunksRef [] -- release chunk references
    let frozenChunks :: [Vector a]
frozenChunks = [Vector a] -> [Vector a]
forall a. [a] -> [a]
reverse [Vector a]
chunks
        totalLen :: Int
totalLen = Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Vector a -> Int) -> [Vector a] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length [Vector a]
frozenChunks)

    IOVector a
mv <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
totalLen

    let copyChunk :: Int -> Vector a -> IO Int
copyChunk !Int
offset Vector a
chunk = do
            MVector (PrimState IO) a -> Vector a -> IO ()
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
VU.copy (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
offset (Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
chunk) IOVector a
mv) Vector a
chunk
            Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
chunk)

    Int
offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Int -> Vector a -> IO Int
copyChunk Int
0 [Vector a]
frozenChunks
    MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
VUM.copy (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
offset Int
count IOVector a
mv) (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
0 Int
count IOVector a
active)

    MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze IOVector a
MVector (PrimState IO) a
mv

-- | STANDARD CONFIG TYPES
data HeaderSpec = NoHeader | UseFirstRow | ProvideNames [T.Text]
    deriving (HeaderSpec -> HeaderSpec -> Bool
(HeaderSpec -> HeaderSpec -> Bool)
-> (HeaderSpec -> HeaderSpec -> Bool) -> Eq HeaderSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: HeaderSpec -> HeaderSpec -> Bool
== :: HeaderSpec -> HeaderSpec -> Bool
$c/= :: HeaderSpec -> HeaderSpec -> Bool
/= :: HeaderSpec -> HeaderSpec -> Bool
Eq, Int -> HeaderSpec -> ShowS
[HeaderSpec] -> ShowS
HeaderSpec -> [Char]
(Int -> HeaderSpec -> ShowS)
-> (HeaderSpec -> [Char])
-> ([HeaderSpec] -> ShowS)
-> Show HeaderSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> HeaderSpec -> ShowS
showsPrec :: Int -> HeaderSpec -> ShowS
$cshow :: HeaderSpec -> [Char]
show :: HeaderSpec -> [Char]
$cshowList :: [HeaderSpec] -> ShowS
showList :: [HeaderSpec] -> ShowS
Show)

data TypeSpec
    = InferFromSample Int
    | SpecifyTypes [(T.Text, SchemaType)]
    | NoInference

-- | CSV read parameters.
data ReadOptions = ReadOptions
    { ReadOptions -> HeaderSpec
headerSpec :: HeaderSpec
    -- ^ Where to get the headers from. (default: UseFirstRow)
    , ReadOptions -> TypeSpec
typeSpec :: TypeSpec
    -- ^ Whether/how to infer types. (default: InferFromSample 100)
    , ReadOptions -> Bool
safeRead :: Bool
    -- ^ Whether to partially parse values into `Maybe`/`Either`. (default: True)
    , ReadOptions -> [Char]
dateFormat :: String
    {- ^ Format of date fields as recognized by the Data.Time.Format module.

    __Examples:__

    @
    > parseTimeM True defaultTimeLocale "%Y/%-m/%-d" "2010/3/04" :: Maybe Day
    Just 2010-03-04
    > parseTimeM True defaultTimeLocale "%d/%-m/%-Y" "04/3/2010" :: Maybe Day
    Just 2010-03-04
    @
    -}
    , ReadOptions -> Char
columnSeparator :: Char
    -- ^ Character that separates column values.
    , ReadOptions -> Maybe Int
numColumns :: Maybe Int
    -- ^ Number of columns to read.
    , ReadOptions -> [Text]
missingIndicators :: [T.Text]
    -- ^ Values that should be read as `Nothing`.
    }

shouldInferFromSample :: TypeSpec -> Bool
shouldInferFromSample :: TypeSpec -> Bool
shouldInferFromSample (InferFromSample Int
_) = Bool
True
shouldInferFromSample TypeSpec
_ = Bool
False

schemaTypeMap :: TypeSpec -> M.Map T.Text SchemaType
schemaTypeMap :: TypeSpec -> Map Text SchemaType
schemaTypeMap (SpecifyTypes [(Text, SchemaType)]
xs) = [(Text, SchemaType)] -> Map Text SchemaType
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, SchemaType)]
xs
schemaTypeMap TypeSpec
_ = Map Text SchemaType
forall k a. Map k a
M.empty

typeInferenceSampleSize :: TypeSpec -> Int
typeInferenceSampleSize :: TypeSpec -> Int
typeInferenceSampleSize (InferFromSample Int
n) = Int
n
typeInferenceSampleSize TypeSpec
_ = Int
0

defaultReadOptions :: ReadOptions
defaultReadOptions :: ReadOptions
defaultReadOptions =
    ReadOptions
        { headerSpec :: HeaderSpec
headerSpec = HeaderSpec
UseFirstRow
        , typeSpec :: TypeSpec
typeSpec = Int -> TypeSpec
InferFromSample Int
100
        , safeRead :: Bool
safeRead = Bool
True
        , dateFormat :: [Char]
dateFormat = [Char]
"%Y-%m-%d"
        , columnSeparator :: Char
columnSeparator = Char
','
        , numColumns :: Maybe Int
numColumns = Maybe Int
forall a. Maybe a
Nothing
        , missingIndicators :: [Text]
missingIndicators = []
        }

{- | Read CSV file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readCsv ".\/data\/taxi.csv"

@
-}
readCsv :: FilePath -> IO DataFrame
readCsv :: [Char] -> IO DataFrame
readCsv = ReadOptions -> [Char] -> IO DataFrame
readSeparated ReadOptions
defaultReadOptions

{- | Read CSV file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readCsvWithOpts ".\/data\/taxi.csv" (D.defaultReadOptions { dateFormat = "%d/%-m/%-Y" })

@
-}
readCsvWithOpts :: ReadOptions -> FilePath -> IO DataFrame
readCsvWithOpts :: ReadOptions -> [Char] -> IO DataFrame
readCsvWithOpts = ReadOptions -> [Char] -> IO DataFrame
readSeparated

{- | Read TSV (tab separated) file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readTsv ".\/data\/taxi.tsv"

@
-}
readTsv :: FilePath -> IO DataFrame
readTsv :: [Char] -> IO DataFrame
readTsv = ReadOptions -> [Char] -> IO DataFrame
readSeparated (ReadOptions
defaultReadOptions{columnSeparator = '\t'})

{- | Read text file with specified delimiter into a dataframe.

==== __Example__
@
ghci> D.readSeparated (D.defaultReadOptions { columnSeparator = ';' }) ".\/data\/taxi.txt"

@
-}
readSeparated :: ReadOptions -> FilePath -> IO DataFrame
readSeparated :: ReadOptions -> [Char] -> IO DataFrame
readSeparated ReadOptions
opts ![Char]
path = do
    let stripUtf8Bom :: LazyByteString -> LazyByteString
stripUtf8Bom LazyByteString
bs = LazyByteString -> Maybe LazyByteString -> LazyByteString
forall a. a -> Maybe a -> a
fromMaybe LazyByteString
bs (LazyByteString -> LazyByteString -> Maybe LazyByteString
BL.stripPrefix LazyByteString
"\xEF\xBB\xBF" LazyByteString
bs)
    LazyByteString
csvData <- LazyByteString -> LazyByteString
stripUtf8Bom (LazyByteString -> LazyByteString)
-> IO LazyByteString -> IO LazyByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO LazyByteString
BL.readFile [Char]
path
    ReadOptions -> LazyByteString -> IO DataFrame
decodeSeparated ReadOptions
opts LazyByteString
csvData

decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame
decodeSeparated :: ReadOptions -> LazyByteString -> IO DataFrame
decodeSeparated !ReadOptions
opts LazyByteString
csvData = do
    let sep :: Char
sep = ReadOptions -> Char
columnSeparator ReadOptions
opts
    let decodeOpts :: DecodeOptions
decodeOpts = DecodeOptions
Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)}
    let stream :: Records (Vector LazyByteString)
stream = DecodeOptions
-> HasHeader -> LazyByteString -> Records (Vector LazyByteString)
forall a.
FromRecord a =>
DecodeOptions -> HasHeader -> LazyByteString -> Records a
CsvStream.decodeWith DecodeOptions
decodeOpts HasHeader
Csv.NoHeader LazyByteString
csvData

    let peekStream :: Records a -> m (a, Records a)
peekStream (Cons (Right a
row) Records a
rest) = (a, Records a) -> m (a, Records a)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (a
row, Records a
rest)
        peekStream (Cons (Left [Char]
err) Records a
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error ([Char] -> m (a, Records a)) -> [Char] -> m (a, Records a)
forall a b. (a -> b) -> a -> b
$ [Char]
"Error parsing CSV header: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err
        peekStream (Nil Maybe [Char]
Nothing LazyByteString
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error [Char]
"Empty CSV file"
        peekStream (Nil (Just [Char]
err) LazyByteString
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error [Char]
err

    (Vector LazyByteString
firstRowRaw, Records (Vector LazyByteString)
dataStream) <- Records (Vector LazyByteString)
-> IO (Vector LazyByteString, Records (Vector LazyByteString))
forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a)
peekStream Records (Vector LazyByteString)
stream

    let ([Text]
columnNames, Records (Vector LazyByteString)
rowsToProcess) = case ReadOptions -> HeaderSpec
headerSpec ReadOptions
opts of
            HeaderSpec
NoHeader ->
                ( (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0 .. Vector LazyByteString -> Int
forall a. Vector a -> Int
V.length Vector LazyByteString
firstRowRaw Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
                , Either [Char] (Vector LazyByteString)
-> Records (Vector LazyByteString)
-> Records (Vector LazyByteString)
forall a. Either [Char] a -> Records a -> Records a
Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString)
forall a b. b -> Either a b
Right Vector LazyByteString
firstRowRaw) Records (Vector LazyByteString)
dataStream
                )
            HeaderSpec
UseFirstRow ->
                ( (LazyByteString -> Text) -> [LazyByteString] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
T.strip (Text -> Text)
-> (LazyByteString -> Text) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeUtf8Lenient (ByteString -> Text)
-> (LazyByteString -> ByteString) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> ByteString
BL.toStrict) (Vector LazyByteString -> [LazyByteString]
forall a. Vector a -> [a]
V.toList Vector LazyByteString
firstRowRaw)
                , Records (Vector LazyByteString)
dataStream
                )
            ProvideNames [Text]
ns ->
                ( [Text]
ns [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
drop ([Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
ns) ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0 .. Vector LazyByteString -> Int
forall a. Vector a -> Int
V.length Vector LazyByteString
firstRowRaw Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1])
                , Either [Char] (Vector LazyByteString)
-> Records (Vector LazyByteString)
-> Records (Vector LazyByteString)
forall a. Either [Char] a -> Records a -> Records a
Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString)
forall a b. b -> Either a b
Right Vector LazyByteString
firstRowRaw) Records (Vector LazyByteString)
dataStream
                )

    (Vector LazyByteString
sampleRow, Records (Vector LazyByteString)
_) <- Records (Vector LazyByteString)
-> IO (Vector LazyByteString, Records (Vector LazyByteString))
forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a)
peekStream Records (Vector LazyByteString)
rowsToProcess
    [BuilderColumn]
builderCols <- [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns [Text]
columnNames (Vector LazyByteString -> [LazyByteString]
forall a. Vector a -> [a]
V.toList Vector LazyByteString
sampleRow) ReadOptions
opts
    let !builderColsV :: Vector BuilderColumn
builderColsV = [BuilderColumn] -> Vector BuilderColumn
forall a. [a] -> Vector a
V.fromList [BuilderColumn]
builderCols
    [Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream
        (ReadOptions -> [Text]
missingIndicators ReadOptions
opts)
        Records (Vector LazyByteString)
rowsToProcess
        Vector BuilderColumn
builderColsV
        (ReadOptions -> Maybe Int
numColumns ReadOptions
opts)

    Vector Column
frozenCols <- (BuilderColumn -> IO Column)
-> Vector BuilderColumn -> IO (Vector Column)
forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> Vector a -> m (Vector b)
V.mapM (ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn ReadOptions
opts) Vector BuilderColumn
builderColsV
    let numRows :: Int
numRows = Int -> (Column -> Int) -> Maybe Column -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 Column -> Int
columnLength (Vector Column
frozenCols Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
0)

    DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame
forall a b. (a -> b) -> a -> b
$
        Vector Column
-> Map Text Int -> (Int, Int) -> Map Text UExpr -> DataFrame
DataFrame
            Vector Column
frozenCols
            ([(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
columnNames [Int
0 ..]))
            (Int
numRows, Vector Column -> Int
forall a. Vector a -> Int
V.length Vector Column
frozenCols)
            Map Text UExpr
forall k a. Map k a
M.empty -- TODO give typed column references

initializeColumns ::
    [T.Text] -> [BL.ByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns :: [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns [Text]
names [LazyByteString]
row ReadOptions
opts = (Text -> Maybe SchemaType -> IO BuilderColumn)
-> [Text] -> [Maybe SchemaType] -> IO [BuilderColumn]
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m [c]
zipWithM Text -> Maybe SchemaType -> IO BuilderColumn
initColumn [Text]
names ((Text -> Maybe SchemaType) -> [Text] -> [Maybe SchemaType]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Maybe SchemaType
lookupType [Text]
names)
  where
    typeMap :: Map Text SchemaType
typeMap = TypeSpec -> Map Text SchemaType
schemaTypeMap (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts)
    -- Return Nothing for columns that should be inferred from BS
    shouldInfer :: Bool
shouldInfer = case ReadOptions -> TypeSpec
typeSpec ReadOptions
opts of
        InferFromSample Int
_ -> Bool
True
        SpecifyTypes [(Text, SchemaType)]
_ -> Bool
True
        TypeSpec
NoInference -> Bool
False
    lookupType :: Text -> Maybe SchemaType
lookupType Text
name = Text -> Map Text SchemaType -> Maybe SchemaType
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name Map Text SchemaType
typeMap
    initColumn :: T.Text -> Maybe SchemaType -> IO BuilderColumn
    initColumn :: Text -> Maybe SchemaType -> IO BuilderColumn
initColumn Text
_ Maybe SchemaType
Nothing | Bool
shouldInfer = do
        PagedUnboxedVector Word8
validityRef <- IO (PagedUnboxedVector Word8)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector
        PagedVector ByteString -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderBS (PagedVector ByteString
 -> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedVector ByteString)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedVector ByteString)
forall a. IO (PagedVector a)
newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
    initColumn Text
_ Maybe SchemaType
mtype = do
        PagedUnboxedVector Word8
validityRef <- IO (PagedUnboxedVector Word8)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector
        let t :: SchemaType
t = SchemaType -> Maybe SchemaType -> SchemaType
forall a. a -> Maybe a -> a
fromMaybe (forall a. Columnable a => SchemaType
schemaType @T.Text) Maybe SchemaType
mtype
        case SchemaType
t of
            SType (Proxy a
_ :: P.Proxy a) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
                Just a :~: Int
Refl -> PagedUnboxedVector Int -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderInt (PagedUnboxedVector Int
 -> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Int)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedUnboxedVector Int)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
                Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                    Just a :~: Double
Refl -> PagedUnboxedVector Double
-> PagedUnboxedVector Word8 -> BuilderColumn
BuilderDouble (PagedUnboxedVector Double
 -> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Double)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedUnboxedVector Double)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
                    Maybe (a :~: Double)
Nothing -> PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderText (PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedVector Text)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedVector Text)
forall a. IO (PagedVector a)
newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef

processStream ::
    [T.Text] ->
    CsvStream.Records (V.Vector BL.ByteString) ->
    V.Vector BuilderColumn ->
    Maybe Int ->
    IO ()
processStream :: [Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream [Text]
_ Records (Vector LazyByteString)
_ Vector BuilderColumn
_ (Just Int
0) = () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
processStream [Text]
missing (Cons (Right Vector LazyByteString
row) Records (Vector LazyByteString)
rest) Vector BuilderColumn
cols Maybe Int
n =
    [Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO ()
processRow [Text]
missing Vector LazyByteString
row Vector BuilderColumn
cols
        IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream [Text]
missing Records (Vector LazyByteString)
rest Vector BuilderColumn
cols ((Int -> Int) -> Maybe Int -> Maybe Int
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Int -> Int -> Int) -> Int -> Int -> Int
forall a b c. (a -> b -> c) -> b -> a -> c
flip (-) Int
1) Maybe Int
n)
processStream [Text]
missing (Cons (Left [Char]
err) Records (Vector LazyByteString)
_) Vector BuilderColumn
_ Maybe Int
_ = [Char] -> IO ()
forall a. HasCallStack => [Char] -> a
error ([Char]
"CSV Parse Error: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err)
processStream [Text]
missing (Nil Maybe [Char]
_ LazyByteString
_) Vector BuilderColumn
_ Maybe Int
_ = () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()

processRow ::
    [T.Text] -> V.Vector BL.ByteString -> V.Vector BuilderColumn -> IO ()
processRow :: [Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO ()
processRow [Text]
missing !Vector LazyByteString
vals !Vector BuilderColumn
cols = (LazyByteString -> BuilderColumn -> IO ())
-> Vector LazyByteString -> Vector BuilderColumn -> IO ()
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> m c) -> Vector a -> Vector b -> m ()
V.zipWithM_ LazyByteString -> BuilderColumn -> IO ()
processValue Vector LazyByteString
vals Vector BuilderColumn
cols
  where
    processValue :: LazyByteString -> BuilderColumn -> IO ()
processValue !LazyByteString
bs !BuilderColumn
col = do
        let !bs' :: ByteString
bs' = LazyByteString -> ByteString
BL.toStrict LazyByteString
bs
        case BuilderColumn
col of
            BuilderInt PagedUnboxedVector Int
gv PagedUnboxedVector Word8
valid -> case HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt ByteString
bs' of
                Just !Int
i -> PagedUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Int
gv Int
i IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
                Maybe Int
Nothing -> PagedUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Int
gv Int
0 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
            BuilderDouble PagedUnboxedVector Double
gv PagedUnboxedVector Word8
valid -> case HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble ByteString
bs' of
                Just !Double
d -> PagedUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Double
gv Double
d IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
                Maybe Double
Nothing -> PagedUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Double
gv Double
0.0 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
            BuilderText PagedVector Text
gv PagedUnboxedVector Word8
valid -> do
                let !val :: Text
val = Text -> Text
T.strip (ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs')
                if Text -> Bool
isNullish Text
val Bool -> Bool -> Bool
|| Text
val Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
missing
                    then PagedVector Text -> Text -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector Text
gv Text
T.empty IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
                    else PagedVector Text -> Text -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector Text
gv Text
val IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
            BuilderBS PagedVector ByteString
gv PagedUnboxedVector Word8
valid -> do
                let !bs'' :: ByteString
bs'' = ByteString -> ByteString
C.strip ByteString
bs'
                if ByteString -> Bool
isNullishBS ByteString
bs'' Bool -> Bool -> Bool
|| ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs'' Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
missing
                    then PagedVector ByteString -> ByteString -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector ByteString
gv ByteString
BS.empty IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
                    else PagedVector ByteString -> ByteString -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector ByteString
gv ByteString
bs'' IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1

freezeBuilderColumn :: BuilderColumn -> IO Column
freezeBuilderColumn :: BuilderColumn -> IO Column
freezeBuilderColumn (BuilderInt PagedUnboxedVector Int
gv PagedUnboxedVector Word8
validRef) = do
    Vector Int
vec <- PagedUnboxedVector Int -> IO (Vector Int)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Int
gv
    Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
    if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Int
vec
        else Vector Int -> Vector Word8 -> IO Column
forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector Int
vec Vector Word8
valid
freezeBuilderColumn (BuilderDouble PagedUnboxedVector Double
gv PagedUnboxedVector Word8
validRef) = do
    Vector Double
vec <- PagedUnboxedVector Double -> IO (Vector Double)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Double
gv
    Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
    if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Double
vec
        else Vector Double -> Vector Word8 -> IO Column
forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector Double
vec Vector Word8
valid
freezeBuilderColumn (BuilderText PagedVector Text
gv PagedUnboxedVector Word8
validRef) = do
    Vector Text
vec <- PagedVector Text -> IO (Vector Text)
forall a. PagedVector a -> IO (Vector a)
freezePagedVector PagedVector Text
gv
    Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
    if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Text -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector Text
vec
        else Vector Text -> Vector Word8 -> IO Column
constructOptionalBoxed Vector Text
vec Vector Word8
valid
freezeBuilderColumn (BuilderBS PagedVector ByteString
_ PagedUnboxedVector Word8
_) =
    [Char] -> IO Column
forall a. HasCallStack => [Char] -> a
error
        [Char]
"freezeBuilderColumn: BuilderBS must be finalized via finalizeBuilderColumn"

finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn ReadOptions
opts (BuilderBS PagedVector ByteString
gv PagedUnboxedVector Word8
validRef) = do
    Vector ByteString
vec <- PagedVector ByteString -> IO (Vector ByteString)
forall a. PagedVector a -> IO (Vector a)
freezePagedVector PagedVector ByteString
gv
    Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
    Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! ReadOptions -> Vector ByteString -> Vector Word8 -> Column
inferColumnFromBS ReadOptions
opts Vector ByteString
vec Vector Word8
valid
finalizeBuilderColumn ReadOptions
_ BuilderColumn
bc = BuilderColumn -> IO Column
freezeBuilderColumn BuilderColumn
bc

inferColumnFromBS ::
    ReadOptions -> V.Vector BS.ByteString -> VU.Vector Word8 -> Column
inferColumnFromBS :: ReadOptions -> Vector ByteString -> Vector Word8 -> Column
inferColumnFromBS ReadOptions
opts Vector ByteString
vec Vector Word8
valid =
    let sampleN :: Int
sampleN = let n :: Int
n = TypeSpec -> Int
typeInferenceSampleSize (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts) in if Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
100 else Int
n
        dfmt :: [Char]
dfmt = ReadOptions -> [Char]
dateFormat ReadOptions
opts
        asMaybeFull :: Vector (Maybe ByteString)
asMaybeFull = Int -> (Int -> Maybe ByteString) -> Vector (Maybe ByteString)
forall a. Int -> (Int -> a) -> Vector a
V.generate (Vector ByteString -> Int
forall a. Vector a -> Int
V.length Vector ByteString
vec) ((Int -> Maybe ByteString) -> Vector (Maybe ByteString))
-> (Int -> Maybe ByteString) -> Vector (Maybe ByteString)
forall a b. (a -> b) -> a -> b
$ \Int
i ->
            if Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1 then ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (Vector ByteString
vec Vector ByteString -> Int -> ByteString
forall a. Vector a -> Int -> a
V.! Int
i) else Maybe ByteString
forall a. Maybe a
Nothing
        samples :: Vector (Maybe ByteString)
samples = Int -> Vector (Maybe ByteString) -> Vector (Maybe ByteString)
forall a. Int -> Vector a -> Vector a
V.take Int
sampleN Vector (Maybe ByteString)
asMaybeFull
        assumption :: ParsingAssumption
assumption = [Char] -> Vector (Maybe ByteString) -> ParsingAssumption
makeParsingAssumptionBS [Char]
dfmt Vector (Maybe ByteString)
samples
     in case ParsingAssumption
assumption of
            ParsingAssumption
IntAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSInt [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull
            ParsingAssumption
DoubleAssumption -> Vector (Maybe ByteString) -> Column
handleBSDouble Vector (Maybe ByteString)
asMaybeFull
            ParsingAssumption
BoolAssumption -> Vector (Maybe ByteString) -> Column
handleBSBool Vector (Maybe ByteString)
asMaybeFull
            ParsingAssumption
DateAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSDate [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull
            ParsingAssumption
TextAssumption -> Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybeFull
            ParsingAssumption
NoAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSNo [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull

makeParsingAssumptionBS ::
    String -> V.Vector (Maybe BS.ByteString) -> ParsingAssumption
makeParsingAssumptionBS :: [Char] -> Vector (Maybe ByteString) -> ParsingAssumption
makeParsingAssumptionBS [Char]
dfmt Vector (Maybe ByteString)
asMaybe
    | (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe ByteString -> Maybe ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe ByteString
forall a. Maybe a
Nothing) Vector (Maybe ByteString)
asMaybe = ParsingAssumption
NoAssumption
    | Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool = ParsingAssumption
BoolAssumption
    | Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
        Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble =
        ParsingAssumption
IntAssumption
    | Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble = ParsingAssumption
DoubleAssumption
    | Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate = ParsingAssumption
DateAssumption
    | Bool
otherwise = ParsingAssumption
TextAssumption
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe

handleBSBool :: V.Vector (Maybe BS.ByteString) -> Column
handleBSBool :: Vector (Maybe ByteString) -> Column
handleBSBool Vector (Maybe ByteString)
asMaybe
    | Bool
parsableAsBool =
        Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool) Vector Bool -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Bool)
asMaybeBool)
    | Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
    parsableAsBool :: Bool
parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool

handleBSInt :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSInt :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSInt [Char]
dfmt Vector (Maybe ByteString)
asMaybe
    | Bool
parsableAsInt =
        Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt) Vector Int -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Int) -> Maybe (Vector Int)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Int)
asMaybeInt)
    | Bool
parsableAsDouble =
        Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
    | Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
  where
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
    parsableAsInt :: Bool
parsableAsInt =
        Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
            Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble

handleBSDouble :: V.Vector (Maybe BS.ByteString) -> Column
handleBSDouble :: Vector (Maybe ByteString) -> Column
handleBSDouble Vector (Maybe ByteString)
asMaybe
    | Bool
parsableAsDouble =
        Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
    | Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
  where
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble

handleBSDate :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSDate :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSDate [Char]
dfmt Vector (Maybe ByteString)
asMaybe
    | Bool
parsableAsDate =
        Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate) Vector Day -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Day) -> Maybe (Vector Day)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Day)
asMaybeDate)
    | Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
  where
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe
    parsableAsDate :: Bool
parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate

handleBSText :: V.Vector (Maybe BS.ByteString) -> Column
handleBSText :: Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe =
    let asMaybeText :: Vector (Maybe Text)
asMaybeText = (Maybe ByteString -> Maybe Text)
-> Vector (Maybe ByteString) -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((ByteString -> Text) -> Maybe ByteString -> Maybe Text
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Text
TE.decodeUtf8Lenient) Vector (Maybe ByteString)
asMaybe
     in Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)

handleBSNo :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSNo :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSNo [Char]
dfmt Vector (Maybe ByteString)
asMaybe
    | (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe ByteString -> Maybe ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe ByteString
forall a. Maybe a
Nothing) Vector (Maybe ByteString)
asMaybe =
        Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector ((Maybe ByteString -> Maybe Text)
-> Vector (Maybe ByteString) -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> Maybe ByteString -> Maybe Text
forall a b. a -> b -> a
const (Maybe Text
forall a. Maybe a
Nothing :: Maybe T.Text)) Vector (Maybe ByteString)
asMaybe)
    | Bool
parsableAsBool =
        Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool) Vector Bool -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Bool)
asMaybeBool)
    | Bool
parsableAsInt =
        Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt) Vector Int -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Int) -> Maybe (Vector Int)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Int)
asMaybeInt)
    | Bool
parsableAsDouble =
        Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
    | Bool
parsableAsDate =
        Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate) Vector Day -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Day) -> Maybe (Vector Day)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Day)
asMaybeDate)
    | Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe
    parsableAsBool :: Bool
parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool
    parsableAsInt :: Bool
parsableAsInt =
        Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
            Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
    parsableAsDate :: Bool
parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate

constructOptional ::
    (VU.Unbox a, Columnable a) => VU.Vector a -> VU.Vector Word8 -> IO Column
constructOptional :: forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector a
vec Vector Word8
valid = do
    let size :: Int
size = Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
vec
    MVector RealWorld (Maybe a)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe a))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
        if (Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
            then MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec Int
i Maybe a
forall a. Maybe a
Nothing
            else MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec Int
i (a -> Maybe a
forall a. a -> Maybe a
Just (Vector a
vec Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
    Vector (Maybe a) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe a) -> Column) -> IO (Vector (Maybe a)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe a) -> IO (Vector (Maybe a))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec

constructOptionalBoxed :: V.Vector T.Text -> VU.Vector Word8 -> IO Column
constructOptionalBoxed :: Vector Text -> Vector Word8 -> IO Column
constructOptionalBoxed Vector Text
vec Vector Word8
valid = do
    let size :: Int
size = Vector Text -> Int
forall a. Vector a -> Int
V.length Vector Text
vec
    MVector RealWorld (Maybe Text)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
        if (Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
            then MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i Maybe Text
forall a. Maybe a
Nothing
            else MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i (Text -> Maybe Text
forall a. a -> Maybe a
Just (Vector Text
vec Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
i))
    Vector (Maybe Text) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Text) -> Column)
-> IO (Vector (Maybe Text)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Text) -> IO (Vector (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec

writeCsv :: FilePath -> DataFrame -> IO ()
writeCsv :: [Char] -> DataFrame -> IO ()
writeCsv = Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
','

writeTsv :: FilePath -> DataFrame -> IO ()
writeTsv :: [Char] -> DataFrame -> IO ()
writeTsv = Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
'\t'

writeSeparated ::
    -- | Separator
    Char ->
    -- | Path to write to
    FilePath ->
    DataFrame ->
    IO ()
writeSeparated :: Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
c [Char]
filepath DataFrame
df = [Char] -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r
withFile [Char]
filepath IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
    let headers :: [Text]
headers = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
    Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
"," [Text]
headers)
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
        let row :: [Text]
row = DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i
        Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
"," [Text]
row)

getRowAsText :: DataFrame -> Int -> [T.Text]
getRowAsText :: DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i = (Int -> Column -> [Text] -> [Text])
-> [Text] -> Vector Column -> [Text]
forall a b. (Int -> a -> b -> b) -> b -> Vector a -> b
V.ifoldr Int -> Column -> [Text] -> [Text]
go [] (DataFrame -> Vector Column
columns DataFrame
df)
  where
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    go :: Int -> Column -> [Text] -> [Text]
go Int
k (BoxedColumn (Vector a
c :: V.Vector a)) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
          where
            textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                Just a :~: Text
Refl -> a
Text
e
                Maybe (a :~: Text)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
                    App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
                        Just a :~~: Maybe
HRefl -> case TypeRep b -> TypeRep Text -> Maybe (b :~: Text)
forall (a :: k1) (b :: k1).
TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality TypeRep b
t2 (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                            Just b :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"null" a
Maybe Text
e
                            Maybe (b :~: Text)
Nothing -> (Text -> Text
fromOptional (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
                              where
                                fromOptional :: Text -> Text
fromOptional Text
s
                                    | Text -> Text -> Bool
T.isPrefixOf Text
"Just " Text
s = Int -> Text -> Text
T.drop (Text -> Int
T.length Text
"Just ") Text
s
                                    | Bool
otherwise = Text
"null"
                        Maybe (a :~~: Maybe)
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
                    TypeRep a
_ -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
        Maybe a
Nothing ->
            [Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
                [Char]
"Column "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    go Int
k (UnboxedColumn Vector a
c) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
        Just a
e -> [Char] -> Text
T.pack (a -> [Char]
forall a. Show a => a -> [Char]
show a
e) Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
        Maybe a
Nothing ->
            [Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
                [Char]
"Column "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    go Int
k (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a))) [Text]
acc = case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just Maybe a
e -> case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
            Just a :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
T.empty Maybe a
Maybe Text
e Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
            Maybe (a :~: Text)
Nothing -> Text -> (a -> Text) -> Maybe a -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
T.empty ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) Maybe a
e Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
        Maybe (Maybe a)
Nothing ->
            [Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
                [Char]
"Column "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                    [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i

stripQuotes :: T.Text -> T.Text
stripQuotes :: Text -> Text
stripQuotes Text
txt =
    case Text -> Maybe (Char, Text)
T.uncons Text
txt of
        Just (Char
'"', Text
rest) ->
            case Text -> Maybe (Text, Char)
T.unsnoc Text
rest of
                Just (Text
middle, Char
'"') -> Text
middle
                Maybe (Text, Char)
_ -> Text
txt
        Maybe (Char, Text)
_ -> Text
txt