{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.IO.CSV where
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Lazy as BL
import qualified Data.List as L
import qualified Data.Map.Strict as M
import qualified Data.Proxy as P
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.IO as TIO
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Data.Csv.Streaming (Records (..))
import qualified Data.Csv.Streaming as CsvStream
import Control.Monad
import Data.Char
import qualified Data.Csv as Csv
import Data.Either
import Data.Function (on)
import Data.Functor
import Data.IORef
import Data.Maybe
import Data.Type.Equality (TestEquality (testEquality))
import Data.Word (Word8)
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Internal.Parsing
import DataFrame.Internal.Schema
import DataFrame.Operations.Typing
import System.IO
import Type.Reflection
import Prelude hiding (concat, takeWhile)
chunkSize :: Int
chunkSize :: Int
chunkSize = Int
16_384
data PagedVector a = PagedVector
{ forall a. PagedVector a -> IORef [Vector a]
pvChunks :: !(IORef [V.Vector a])
, forall a. PagedVector a -> IORef (IOVector a)
pvActive :: !(IORef (VM.IOVector a))
, forall a. PagedVector a -> IORef Int
pvCount :: !(IORef Int)
}
data PagedUnboxedVector a = PagedUnboxedVector
{ forall a. PagedUnboxedVector a -> IORef [Vector a]
puvChunks :: !(IORef [VU.Vector a])
, forall a. PagedUnboxedVector a -> IORef (IOVector a)
puvActive :: !(IORef (VUM.IOVector a))
, forall a. PagedUnboxedVector a -> IORef Int
puvCount :: !(IORef Int)
}
data BuilderColumn
= BuilderInt !(PagedUnboxedVector Int) !(PagedUnboxedVector Word8)
| BuilderDouble !(PagedUnboxedVector Double) !(PagedUnboxedVector Word8)
| BuilderText !(PagedVector T.Text) !(PagedUnboxedVector Word8)
| BuilderBS !(PagedVector BS.ByteString) !(PagedUnboxedVector Word8)
newPagedVector :: IO (PagedVector a)
newPagedVector :: forall a. IO (PagedVector a)
newPagedVector = do
IOVector a
active <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
chunkSize
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedVector a
forall a.
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedVector a
PagedVector (IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedVector a)
-> IO (IORef [Vector a])
-> IO (IORef (IOVector a) -> IORef Int -> PagedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Vector a] -> IO (IORef [Vector a])
forall a. a -> IO (IORef a)
newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedVector a)
-> IO (IORef (IOVector a)) -> IO (IORef Int -> PagedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
active IO (IORef Int -> PagedVector a)
-> IO (IORef Int) -> IO (PagedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0
newPagedUnboxedVector :: (VUM.Unbox a) => IO (PagedUnboxedVector a)
newPagedUnboxedVector :: forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector = do
IOVector a
active <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
chunkSize
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a
forall a.
IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a
PagedUnboxedVector (IORef [Vector a]
-> IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
-> IO (IORef [Vector a])
-> IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Vector a] -> IO (IORef [Vector a])
forall a. a -> IO (IORef a)
newIORef [] IO (IORef (IOVector a) -> IORef Int -> PagedUnboxedVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> PagedUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
active IO (IORef Int -> PagedUnboxedVector a)
-> IO (IORef Int) -> IO (PagedUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0
appendPagedVector :: PagedVector a -> a -> IO ()
appendPagedVector :: forall a. PagedVector a -> a -> IO ()
appendPagedVector (PagedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) !a
val = do
Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
chunkSize
then do
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
active Int
count a
val
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
else do
Vector a
frozen <- MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze IOVector a
MVector (PrimState IO) a
active
IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Vector a]
chunksRef (Vector a
frozen Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
:)
IOVector a
newActive <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
chunkSize
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
newActive Int
0 a
val
IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
activeRef IOVector a
newActive
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef Int
1
{-# INLINE appendPagedVector #-}
appendPagedUnboxedVector :: (VUM.Unbox a) => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector (PagedUnboxedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) !a
val = do
Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
if Int
count Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
chunkSize
then do
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
active Int
count a
val
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
else do
Vector a
frozen <- MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze IOVector a
MVector (PrimState IO) a
active
IORef [Vector a] -> ([Vector a] -> [Vector a]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Vector a]
chunksRef (Vector a
frozen Vector a -> [Vector a] -> [Vector a]
forall a. a -> [a] -> [a]
:)
IOVector a
newActive <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
chunkSize
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
newActive Int
0 a
val
IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
activeRef IOVector a
newActive
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
countRef Int
1
{-# INLINE appendPagedUnboxedVector #-}
freezePagedVector :: PagedVector a -> IO (V.Vector a)
freezePagedVector :: forall a. PagedVector a -> IO (Vector a)
freezePagedVector (PagedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) = do
Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
[Vector a]
chunks <- IORef [Vector a] -> IO [Vector a]
forall a. IORef a -> IO a
readIORef IORef [Vector a]
chunksRef
IORef [Vector a] -> [Vector a] -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef [Vector a]
chunksRef []
let frozenChunks :: [Vector a]
frozenChunks = [Vector a] -> [Vector a]
forall a. [a] -> [a]
reverse [Vector a]
chunks
totalLen :: Int
totalLen = Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Vector a -> Int) -> [Vector a] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Int
forall a. Vector a -> Int
V.length [Vector a]
frozenChunks)
IOVector a
mv <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
totalLen
let copyChunk :: Int -> Vector a -> IO Int
copyChunk !Int
offset Vector a
chunk = do
MVector (PrimState IO) a -> Vector a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Vector a -> m ()
V.copy (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
offset (Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
chunk) IOVector a
mv) Vector a
chunk
Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
chunk)
Int
offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Int -> Vector a -> IO Int
copyChunk Int
0 [Vector a]
frozenChunks
MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
VM.copy (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
offset Int
count IOVector a
mv) (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
0 Int
count IOVector a
active)
MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze IOVector a
MVector (PrimState IO) a
mv
freezePagedUnboxedVector ::
(VUM.Unbox a) => PagedUnboxedVector a -> IO (VU.Vector a)
freezePagedUnboxedVector :: forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector (PagedUnboxedVector IORef [Vector a]
chunksRef IORef (IOVector a)
activeRef IORef Int
countRef) = do
Int
count <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
countRef
IOVector a
active <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
activeRef
[Vector a]
chunks <- IORef [Vector a] -> IO [Vector a]
forall a. IORef a -> IO a
readIORef IORef [Vector a]
chunksRef
IORef [Vector a] -> [Vector a] -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef [Vector a]
chunksRef []
let frozenChunks :: [Vector a]
frozenChunks = [Vector a] -> [Vector a]
forall a. [a] -> [a]
reverse [Vector a]
chunks
totalLen :: Int
totalLen = Int
count Int -> Int -> Int
forall a. Num a => a -> a -> a
+ [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((Vector a -> Int) -> [Vector a] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length [Vector a]
frozenChunks)
IOVector a
mv <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
totalLen
let copyChunk :: Int -> Vector a -> IO Int
copyChunk !Int
offset Vector a
chunk = do
MVector (PrimState IO) a -> Vector a -> IO ()
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> Vector a -> m ()
VU.copy (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
offset (Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
chunk) IOVector a
mv) Vector a
chunk
Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
chunk)
Int
offset <- (Int -> Vector a -> IO Int) -> Int -> [Vector a] -> IO Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Int -> Vector a -> IO Int
copyChunk Int
0 [Vector a]
frozenChunks
MVector (PrimState IO) a -> MVector (PrimState IO) a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> MVector (PrimState m) a -> m ()
VUM.copy (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
offset Int
count IOVector a
mv) (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
0 Int
count IOVector a
active)
MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze IOVector a
MVector (PrimState IO) a
mv
data = | UseFirstRow | ProvideNames [T.Text]
deriving (HeaderSpec -> HeaderSpec -> Bool
(HeaderSpec -> HeaderSpec -> Bool)
-> (HeaderSpec -> HeaderSpec -> Bool) -> Eq HeaderSpec
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: HeaderSpec -> HeaderSpec -> Bool
== :: HeaderSpec -> HeaderSpec -> Bool
$c/= :: HeaderSpec -> HeaderSpec -> Bool
/= :: HeaderSpec -> HeaderSpec -> Bool
Eq, Int -> HeaderSpec -> ShowS
[HeaderSpec] -> ShowS
HeaderSpec -> [Char]
(Int -> HeaderSpec -> ShowS)
-> (HeaderSpec -> [Char])
-> ([HeaderSpec] -> ShowS)
-> Show HeaderSpec
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> HeaderSpec -> ShowS
showsPrec :: Int -> HeaderSpec -> ShowS
$cshow :: HeaderSpec -> [Char]
show :: HeaderSpec -> [Char]
$cshowList :: [HeaderSpec] -> ShowS
showList :: [HeaderSpec] -> ShowS
Show)
data TypeSpec
= InferFromSample Int
| SpecifyTypes [(T.Text, SchemaType)]
| NoInference
data ReadOptions = ReadOptions
{ :: HeaderSpec
, ReadOptions -> TypeSpec
typeSpec :: TypeSpec
, ReadOptions -> Bool
safeRead :: Bool
, ReadOptions -> [Char]
dateFormat :: String
, ReadOptions -> Char
columnSeparator :: Char
, ReadOptions -> Maybe Int
numColumns :: Maybe Int
, ReadOptions -> [Text]
missingIndicators :: [T.Text]
}
shouldInferFromSample :: TypeSpec -> Bool
shouldInferFromSample :: TypeSpec -> Bool
shouldInferFromSample (InferFromSample Int
_) = Bool
True
shouldInferFromSample TypeSpec
_ = Bool
False
schemaTypeMap :: TypeSpec -> M.Map T.Text SchemaType
schemaTypeMap :: TypeSpec -> Map Text SchemaType
schemaTypeMap (SpecifyTypes [(Text, SchemaType)]
xs) = [(Text, SchemaType)] -> Map Text SchemaType
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, SchemaType)]
xs
schemaTypeMap TypeSpec
_ = Map Text SchemaType
forall k a. Map k a
M.empty
typeInferenceSampleSize :: TypeSpec -> Int
typeInferenceSampleSize :: TypeSpec -> Int
typeInferenceSampleSize (InferFromSample Int
n) = Int
n
typeInferenceSampleSize TypeSpec
_ = Int
0
defaultReadOptions :: ReadOptions
defaultReadOptions :: ReadOptions
defaultReadOptions =
ReadOptions
{ headerSpec :: HeaderSpec
headerSpec = HeaderSpec
UseFirstRow
, typeSpec :: TypeSpec
typeSpec = Int -> TypeSpec
InferFromSample Int
100
, safeRead :: Bool
safeRead = Bool
True
, dateFormat :: [Char]
dateFormat = [Char]
"%Y-%m-%d"
, columnSeparator :: Char
columnSeparator = Char
','
, numColumns :: Maybe Int
numColumns = Maybe Int
forall a. Maybe a
Nothing
, missingIndicators :: [Text]
missingIndicators = []
}
readCsv :: FilePath -> IO DataFrame
readCsv :: [Char] -> IO DataFrame
readCsv = ReadOptions -> [Char] -> IO DataFrame
readSeparated ReadOptions
defaultReadOptions
readCsvWithOpts :: ReadOptions -> FilePath -> IO DataFrame
readCsvWithOpts :: ReadOptions -> [Char] -> IO DataFrame
readCsvWithOpts = ReadOptions -> [Char] -> IO DataFrame
readSeparated
readTsv :: FilePath -> IO DataFrame
readTsv :: [Char] -> IO DataFrame
readTsv = ReadOptions -> [Char] -> IO DataFrame
readSeparated (ReadOptions
defaultReadOptions{columnSeparator = '\t'})
readSeparated :: ReadOptions -> FilePath -> IO DataFrame
readSeparated :: ReadOptions -> [Char] -> IO DataFrame
readSeparated ReadOptions
opts ![Char]
path = do
let stripUtf8Bom :: LazyByteString -> LazyByteString
stripUtf8Bom LazyByteString
bs = LazyByteString -> Maybe LazyByteString -> LazyByteString
forall a. a -> Maybe a -> a
fromMaybe LazyByteString
bs (LazyByteString -> LazyByteString -> Maybe LazyByteString
BL.stripPrefix LazyByteString
"\xEF\xBB\xBF" LazyByteString
bs)
LazyByteString
csvData <- LazyByteString -> LazyByteString
stripUtf8Bom (LazyByteString -> LazyByteString)
-> IO LazyByteString -> IO LazyByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> IO LazyByteString
BL.readFile [Char]
path
ReadOptions -> LazyByteString -> IO DataFrame
decodeSeparated ReadOptions
opts LazyByteString
csvData
decodeSeparated :: ReadOptions -> BL.ByteString -> IO DataFrame
decodeSeparated :: ReadOptions -> LazyByteString -> IO DataFrame
decodeSeparated !ReadOptions
opts LazyByteString
csvData = do
let sep :: Char
sep = ReadOptions -> Char
columnSeparator ReadOptions
opts
let decodeOpts :: DecodeOptions
decodeOpts = DecodeOptions
Csv.defaultDecodeOptions{Csv.decDelimiter = fromIntegral (ord sep)}
let stream :: Records (Vector LazyByteString)
stream = DecodeOptions
-> HasHeader -> LazyByteString -> Records (Vector LazyByteString)
forall a.
FromRecord a =>
DecodeOptions -> HasHeader -> LazyByteString -> Records a
CsvStream.decodeWith DecodeOptions
decodeOpts HasHeader
Csv.NoHeader LazyByteString
csvData
let peekStream :: Records a -> m (a, Records a)
peekStream (Cons (Right a
row) Records a
rest) = (a, Records a) -> m (a, Records a)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (a
row, Records a
rest)
peekStream (Cons (Left [Char]
err) Records a
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error ([Char] -> m (a, Records a)) -> [Char] -> m (a, Records a)
forall a b. (a -> b) -> a -> b
$ [Char]
"Error parsing CSV header: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err
peekStream (Nil Maybe [Char]
Nothing LazyByteString
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error [Char]
"Empty CSV file"
peekStream (Nil (Just [Char]
err) LazyByteString
_) = [Char] -> m (a, Records a)
forall a. HasCallStack => [Char] -> a
error [Char]
err
(Vector LazyByteString
firstRowRaw, Records (Vector LazyByteString)
dataStream) <- Records (Vector LazyByteString)
-> IO (Vector LazyByteString, Records (Vector LazyByteString))
forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a)
peekStream Records (Vector LazyByteString)
stream
let ([Text]
columnNames, Records (Vector LazyByteString)
rowsToProcess) = case ReadOptions -> HeaderSpec
headerSpec ReadOptions
opts of
HeaderSpec
NoHeader ->
( (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0 .. Vector LazyByteString -> Int
forall a. Vector a -> Int
V.length Vector LazyByteString
firstRowRaw Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
, Either [Char] (Vector LazyByteString)
-> Records (Vector LazyByteString)
-> Records (Vector LazyByteString)
forall a. Either [Char] a -> Records a -> Records a
Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString)
forall a b. b -> Either a b
Right Vector LazyByteString
firstRowRaw) Records (Vector LazyByteString)
dataStream
)
HeaderSpec
UseFirstRow ->
( (LazyByteString -> Text) -> [LazyByteString] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
T.strip (Text -> Text)
-> (LazyByteString -> Text) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeUtf8Lenient (ByteString -> Text)
-> (LazyByteString -> ByteString) -> LazyByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> ByteString
BL.toStrict) (Vector LazyByteString -> [LazyByteString]
forall a. Vector a -> [a]
V.toList Vector LazyByteString
firstRowRaw)
, Records (Vector LazyByteString)
dataStream
)
ProvideNames [Text]
ns ->
( [Text]
ns [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
++ Int -> [Text] -> [Text]
forall a. Int -> [a] -> [a]
drop ([Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
ns) ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0 .. Vector LazyByteString -> Int
forall a. Vector a -> Int
V.length Vector LazyByteString
firstRowRaw Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1])
, Either [Char] (Vector LazyByteString)
-> Records (Vector LazyByteString)
-> Records (Vector LazyByteString)
forall a. Either [Char] a -> Records a -> Records a
Cons (Vector LazyByteString -> Either [Char] (Vector LazyByteString)
forall a b. b -> Either a b
Right Vector LazyByteString
firstRowRaw) Records (Vector LazyByteString)
dataStream
)
(Vector LazyByteString
sampleRow, Records (Vector LazyByteString)
_) <- Records (Vector LazyByteString)
-> IO (Vector LazyByteString, Records (Vector LazyByteString))
forall {m :: * -> *} {a}. Monad m => Records a -> m (a, Records a)
peekStream Records (Vector LazyByteString)
rowsToProcess
[BuilderColumn]
builderCols <- [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns [Text]
columnNames (Vector LazyByteString -> [LazyByteString]
forall a. Vector a -> [a]
V.toList Vector LazyByteString
sampleRow) ReadOptions
opts
let !builderColsV :: Vector BuilderColumn
builderColsV = [BuilderColumn] -> Vector BuilderColumn
forall a. [a] -> Vector a
V.fromList [BuilderColumn]
builderCols
[Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream
(ReadOptions -> [Text]
missingIndicators ReadOptions
opts)
Records (Vector LazyByteString)
rowsToProcess
Vector BuilderColumn
builderColsV
(ReadOptions -> Maybe Int
numColumns ReadOptions
opts)
Vector Column
frozenCols <- (BuilderColumn -> IO Column)
-> Vector BuilderColumn -> IO (Vector Column)
forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> Vector a -> m (Vector b)
V.mapM (ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn ReadOptions
opts) Vector BuilderColumn
builderColsV
let numRows :: Int
numRows = Int -> (Column -> Int) -> Maybe Column -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 Column -> Int
columnLength (Vector Column
frozenCols Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
0)
DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame
forall a b. (a -> b) -> a -> b
$
Vector Column
-> Map Text Int -> (Int, Int) -> Map Text UExpr -> DataFrame
DataFrame
Vector Column
frozenCols
([(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
columnNames [Int
0 ..]))
(Int
numRows, Vector Column -> Int
forall a. Vector a -> Int
V.length Vector Column
frozenCols)
Map Text UExpr
forall k a. Map k a
M.empty
initializeColumns ::
[T.Text] -> [BL.ByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns :: [Text] -> [LazyByteString] -> ReadOptions -> IO [BuilderColumn]
initializeColumns [Text]
names [LazyByteString]
row ReadOptions
opts = (Text -> Maybe SchemaType -> IO BuilderColumn)
-> [Text] -> [Maybe SchemaType] -> IO [BuilderColumn]
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m [c]
zipWithM Text -> Maybe SchemaType -> IO BuilderColumn
initColumn [Text]
names ((Text -> Maybe SchemaType) -> [Text] -> [Maybe SchemaType]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Maybe SchemaType
lookupType [Text]
names)
where
typeMap :: Map Text SchemaType
typeMap = TypeSpec -> Map Text SchemaType
schemaTypeMap (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts)
shouldInfer :: Bool
shouldInfer = case ReadOptions -> TypeSpec
typeSpec ReadOptions
opts of
InferFromSample Int
_ -> Bool
True
SpecifyTypes [(Text, SchemaType)]
_ -> Bool
True
TypeSpec
NoInference -> Bool
False
lookupType :: Text -> Maybe SchemaType
lookupType Text
name = Text -> Map Text SchemaType -> Maybe SchemaType
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name Map Text SchemaType
typeMap
initColumn :: T.Text -> Maybe SchemaType -> IO BuilderColumn
initColumn :: Text -> Maybe SchemaType -> IO BuilderColumn
initColumn Text
_ Maybe SchemaType
Nothing | Bool
shouldInfer = do
PagedUnboxedVector Word8
validityRef <- IO (PagedUnboxedVector Word8)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector
PagedVector ByteString -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderBS (PagedVector ByteString
-> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedVector ByteString)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedVector ByteString)
forall a. IO (PagedVector a)
newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
initColumn Text
_ Maybe SchemaType
mtype = do
PagedUnboxedVector Word8
validityRef <- IO (PagedUnboxedVector Word8)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector
let t :: SchemaType
t = SchemaType -> Maybe SchemaType -> SchemaType
forall a. a -> Maybe a -> a
fromMaybe (forall a. Columnable a => SchemaType
schemaType @T.Text) Maybe SchemaType
mtype
case SchemaType
t of
SType (Proxy a
_ :: P.Proxy a) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> PagedUnboxedVector Int -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderInt (PagedUnboxedVector Int
-> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Int)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedUnboxedVector Int)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> PagedUnboxedVector Double
-> PagedUnboxedVector Word8 -> BuilderColumn
BuilderDouble (PagedUnboxedVector Double
-> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Double)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedUnboxedVector Double)
forall a. Unbox a => IO (PagedUnboxedVector a)
newPagedUnboxedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
Maybe (a :~: Double)
Nothing -> PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn
BuilderText (PagedVector Text -> PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedVector Text)
-> IO (PagedUnboxedVector Word8 -> BuilderColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IO (PagedVector Text)
forall a. IO (PagedVector a)
newPagedVector IO (PagedUnboxedVector Word8 -> BuilderColumn)
-> IO (PagedUnboxedVector Word8) -> IO BuilderColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> PagedUnboxedVector Word8 -> IO (PagedUnboxedVector Word8)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure PagedUnboxedVector Word8
validityRef
processStream ::
[T.Text] ->
CsvStream.Records (V.Vector BL.ByteString) ->
V.Vector BuilderColumn ->
Maybe Int ->
IO ()
processStream :: [Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream [Text]
_ Records (Vector LazyByteString)
_ Vector BuilderColumn
_ (Just Int
0) = () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
processStream [Text]
missing (Cons (Right Vector LazyByteString
row) Records (Vector LazyByteString)
rest) Vector BuilderColumn
cols Maybe Int
n =
[Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO ()
processRow [Text]
missing Vector LazyByteString
row Vector BuilderColumn
cols
IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Text]
-> Records (Vector LazyByteString)
-> Vector BuilderColumn
-> Maybe Int
-> IO ()
processStream [Text]
missing Records (Vector LazyByteString)
rest Vector BuilderColumn
cols ((Int -> Int) -> Maybe Int -> Maybe Int
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Int -> Int -> Int) -> Int -> Int -> Int
forall a b c. (a -> b -> c) -> b -> a -> c
flip (-) Int
1) Maybe Int
n)
processStream [Text]
missing (Cons (Left [Char]
err) Records (Vector LazyByteString)
_) Vector BuilderColumn
_ Maybe Int
_ = [Char] -> IO ()
forall a. HasCallStack => [Char] -> a
error ([Char]
"CSV Parse Error: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
err)
processStream [Text]
missing (Nil Maybe [Char]
_ LazyByteString
_) Vector BuilderColumn
_ Maybe Int
_ = () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
processRow ::
[T.Text] -> V.Vector BL.ByteString -> V.Vector BuilderColumn -> IO ()
processRow :: [Text] -> Vector LazyByteString -> Vector BuilderColumn -> IO ()
processRow [Text]
missing !Vector LazyByteString
vals !Vector BuilderColumn
cols = (LazyByteString -> BuilderColumn -> IO ())
-> Vector LazyByteString -> Vector BuilderColumn -> IO ()
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> m c) -> Vector a -> Vector b -> m ()
V.zipWithM_ LazyByteString -> BuilderColumn -> IO ()
processValue Vector LazyByteString
vals Vector BuilderColumn
cols
where
processValue :: LazyByteString -> BuilderColumn -> IO ()
processValue !LazyByteString
bs !BuilderColumn
col = do
let !bs' :: ByteString
bs' = LazyByteString -> ByteString
BL.toStrict LazyByteString
bs
case BuilderColumn
col of
BuilderInt PagedUnboxedVector Int
gv PagedUnboxedVector Word8
valid -> case HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt ByteString
bs' of
Just !Int
i -> PagedUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Int
gv Int
i IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
Maybe Int
Nothing -> PagedUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Int
gv Int
0 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
BuilderDouble PagedUnboxedVector Double
gv PagedUnboxedVector Word8
valid -> case HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble ByteString
bs' of
Just !Double
d -> PagedUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Double
gv Double
d IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
Maybe Double
Nothing -> PagedUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Double
gv Double
0.0 IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
BuilderText PagedVector Text
gv PagedUnboxedVector Word8
valid -> do
let !val :: Text
val = Text -> Text
T.strip (ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs')
if Text -> Bool
isNullish Text
val Bool -> Bool -> Bool
|| Text
val Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
missing
then PagedVector Text -> Text -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector Text
gv Text
T.empty IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
else PagedVector Text -> Text -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector Text
gv Text
val IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
BuilderBS PagedVector ByteString
gv PagedUnboxedVector Word8
valid -> do
let !bs'' :: ByteString
bs'' = ByteString -> ByteString
C.strip ByteString
bs'
if ByteString -> Bool
isNullishBS ByteString
bs'' Bool -> Bool -> Bool
|| ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs'' Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
missing
then PagedVector ByteString -> ByteString -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector ByteString
gv ByteString
BS.empty IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
0
else PagedVector ByteString -> ByteString -> IO ()
forall a. PagedVector a -> a -> IO ()
appendPagedVector PagedVector ByteString
gv ByteString
bs'' IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> PagedUnboxedVector Word8 -> Word8 -> IO ()
forall a. Unbox a => PagedUnboxedVector a -> a -> IO ()
appendPagedUnboxedVector PagedUnboxedVector Word8
valid Word8
1
freezeBuilderColumn :: BuilderColumn -> IO Column
freezeBuilderColumn :: BuilderColumn -> IO Column
freezeBuilderColumn (BuilderInt PagedUnboxedVector Int
gv PagedUnboxedVector Word8
validRef) = do
Vector Int
vec <- PagedUnboxedVector Int -> IO (Vector Int)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Int
gv
Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Int
vec
else Vector Int -> Vector Word8 -> IO Column
forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector Int
vec Vector Word8
valid
freezeBuilderColumn (BuilderDouble PagedUnboxedVector Double
gv PagedUnboxedVector Word8
validRef) = do
Vector Double
vec <- PagedUnboxedVector Double -> IO (Vector Double)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Double
gv
Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Double
vec
else Vector Double -> Vector Word8 -> IO Column
forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector Double
vec Vector Word8
valid
freezeBuilderColumn (BuilderText PagedVector Text
gv PagedUnboxedVector Word8
validRef) = do
Vector Text
vec <- PagedVector Text -> IO (Vector Text)
forall a. PagedVector a -> IO (Vector a)
freezePagedVector PagedVector Text
gv
Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
if (Word8 -> Bool) -> Vector Word8 -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.all (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1) Vector Word8
valid
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! Vector Text -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector Text
vec
else Vector Text -> Vector Word8 -> IO Column
constructOptionalBoxed Vector Text
vec Vector Word8
valid
freezeBuilderColumn (BuilderBS PagedVector ByteString
_ PagedUnboxedVector Word8
_) =
[Char] -> IO Column
forall a. HasCallStack => [Char] -> a
error
[Char]
"freezeBuilderColumn: BuilderBS must be finalized via finalizeBuilderColumn"
finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn :: ReadOptions -> BuilderColumn -> IO Column
finalizeBuilderColumn ReadOptions
opts (BuilderBS PagedVector ByteString
gv PagedUnboxedVector Word8
validRef) = do
Vector ByteString
vec <- PagedVector ByteString -> IO (Vector ByteString)
forall a. PagedVector a -> IO (Vector a)
freezePagedVector PagedVector ByteString
gv
Vector Word8
valid <- PagedUnboxedVector Word8 -> IO (Vector Word8)
forall a. Unbox a => PagedUnboxedVector a -> IO (Vector a)
freezePagedUnboxedVector PagedUnboxedVector Word8
validRef
Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$! ReadOptions -> Vector ByteString -> Vector Word8 -> Column
inferColumnFromBS ReadOptions
opts Vector ByteString
vec Vector Word8
valid
finalizeBuilderColumn ReadOptions
_ BuilderColumn
bc = BuilderColumn -> IO Column
freezeBuilderColumn BuilderColumn
bc
inferColumnFromBS ::
ReadOptions -> V.Vector BS.ByteString -> VU.Vector Word8 -> Column
inferColumnFromBS :: ReadOptions -> Vector ByteString -> Vector Word8 -> Column
inferColumnFromBS ReadOptions
opts Vector ByteString
vec Vector Word8
valid =
let sampleN :: Int
sampleN = let n :: Int
n = TypeSpec -> Int
typeInferenceSampleSize (ReadOptions -> TypeSpec
typeSpec ReadOptions
opts) in if Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int
100 else Int
n
dfmt :: [Char]
dfmt = ReadOptions -> [Char]
dateFormat ReadOptions
opts
asMaybeFull :: Vector (Maybe ByteString)
asMaybeFull = Int -> (Int -> Maybe ByteString) -> Vector (Maybe ByteString)
forall a. Int -> (Int -> a) -> Vector a
V.generate (Vector ByteString -> Int
forall a. Vector a -> Int
V.length Vector ByteString
vec) ((Int -> Maybe ByteString) -> Vector (Maybe ByteString))
-> (Int -> Maybe ByteString) -> Vector (Maybe ByteString)
forall a b. (a -> b) -> a -> b
$ \Int
i ->
if Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
1 then ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (Vector ByteString
vec Vector ByteString -> Int -> ByteString
forall a. Vector a -> Int -> a
V.! Int
i) else Maybe ByteString
forall a. Maybe a
Nothing
samples :: Vector (Maybe ByteString)
samples = Int -> Vector (Maybe ByteString) -> Vector (Maybe ByteString)
forall a. Int -> Vector a -> Vector a
V.take Int
sampleN Vector (Maybe ByteString)
asMaybeFull
assumption :: ParsingAssumption
assumption = [Char] -> Vector (Maybe ByteString) -> ParsingAssumption
makeParsingAssumptionBS [Char]
dfmt Vector (Maybe ByteString)
samples
in case ParsingAssumption
assumption of
ParsingAssumption
IntAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSInt [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull
ParsingAssumption
DoubleAssumption -> Vector (Maybe ByteString) -> Column
handleBSDouble Vector (Maybe ByteString)
asMaybeFull
ParsingAssumption
BoolAssumption -> Vector (Maybe ByteString) -> Column
handleBSBool Vector (Maybe ByteString)
asMaybeFull
ParsingAssumption
DateAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSDate [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull
ParsingAssumption
TextAssumption -> Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybeFull
ParsingAssumption
NoAssumption -> [Char] -> Vector (Maybe ByteString) -> Column
handleBSNo [Char]
dfmt Vector (Maybe ByteString)
asMaybeFull
makeParsingAssumptionBS ::
String -> V.Vector (Maybe BS.ByteString) -> ParsingAssumption
makeParsingAssumptionBS :: [Char] -> Vector (Maybe ByteString) -> ParsingAssumption
makeParsingAssumptionBS [Char]
dfmt Vector (Maybe ByteString)
asMaybe
| (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe ByteString -> Maybe ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe ByteString
forall a. Maybe a
Nothing) Vector (Maybe ByteString)
asMaybe = ParsingAssumption
NoAssumption
| Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool = ParsingAssumption
BoolAssumption
| Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble =
ParsingAssumption
IntAssumption
| Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble = ParsingAssumption
DoubleAssumption
| Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate = ParsingAssumption
DateAssumption
| Bool
otherwise = ParsingAssumption
TextAssumption
where
asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe
handleBSBool :: V.Vector (Maybe BS.ByteString) -> Column
handleBSBool :: Vector (Maybe ByteString) -> Column
handleBSBool Vector (Maybe ByteString)
asMaybe
| Bool
parsableAsBool =
Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool) Vector Bool -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Bool)
asMaybeBool)
| Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
where
asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
parsableAsBool :: Bool
parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool
handleBSInt :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSInt :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSInt [Char]
dfmt Vector (Maybe ByteString)
asMaybe
| Bool
parsableAsInt =
Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt) Vector Int -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Int) -> Maybe (Vector Int)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Int)
asMaybeInt)
| Bool
parsableAsDouble =
Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
| Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
where
asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
parsableAsInt :: Bool
parsableAsInt =
Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
handleBSDouble :: V.Vector (Maybe BS.ByteString) -> Column
handleBSDouble :: Vector (Maybe ByteString) -> Column
handleBSDouble Vector (Maybe ByteString)
asMaybe
| Bool
parsableAsDouble =
Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
| Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
where
asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
handleBSDate :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSDate :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSDate [Char]
dfmt Vector (Maybe ByteString)
asMaybe
| Bool
parsableAsDate =
Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate) Vector Day -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Day) -> Maybe (Vector Day)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Day)
asMaybeDate)
| Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
where
asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe
parsableAsDate :: Bool
parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate
handleBSText :: V.Vector (Maybe BS.ByteString) -> Column
handleBSText :: Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe =
let asMaybeText :: Vector (Maybe Text)
asMaybeText = (Maybe ByteString -> Maybe Text)
-> Vector (Maybe ByteString) -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((ByteString -> Text) -> Maybe ByteString -> Maybe Text
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Text
TE.decodeUtf8Lenient) Vector (Maybe ByteString)
asMaybe
in Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)
handleBSNo :: String -> V.Vector (Maybe BS.ByteString) -> Column
handleBSNo :: [Char] -> Vector (Maybe ByteString) -> Column
handleBSNo [Char]
dfmt Vector (Maybe ByteString)
asMaybe
| (Maybe ByteString -> Bool) -> Vector (Maybe ByteString) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe ByteString -> Maybe ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe ByteString
forall a. Maybe a
Nothing) Vector (Maybe ByteString)
asMaybe =
Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector ((Maybe ByteString -> Maybe Text)
-> Vector (Maybe ByteString) -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> Maybe ByteString -> Maybe Text
forall a b. a -> b -> a
const (Maybe Text
forall a. Maybe a
Nothing :: Maybe T.Text)) Vector (Maybe ByteString)
asMaybe)
| Bool
parsableAsBool =
Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool) Vector Bool -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Bool)
asMaybeBool)
| Bool
parsableAsInt =
Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt) Vector Int -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Int) -> Maybe (Vector Int)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Int)
asMaybeInt)
| Bool
parsableAsDouble =
Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
| Bool
parsableAsDate =
Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate) Vector Day -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Day) -> Maybe (Vector Day)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Day)
asMaybeDate)
| Bool
otherwise = Vector (Maybe ByteString) -> Column
handleBSText Vector (Maybe ByteString)
asMaybe
where
asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe ByteString -> Maybe Bool)
-> Vector (Maybe ByteString) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ByteString -> Maybe Bool
readByteStringBool) Vector (Maybe ByteString)
asMaybe
asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe ByteString -> Maybe Int)
-> Vector (Maybe ByteString) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt) Vector (Maybe ByteString)
asMaybe
asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe ByteString -> Maybe Double)
-> Vector (Maybe ByteString) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble) Vector (Maybe ByteString)
asMaybe
asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe ByteString -> Maybe Day)
-> Vector (Maybe ByteString) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe ByteString -> (ByteString -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> ByteString -> Maybe Day
readByteStringDate [Char]
dfmt) Vector (Maybe ByteString)
asMaybe
parsableAsBool :: Bool
parsableAsBool = Vector (Maybe ByteString) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Bool)
asMaybeBool
parsableAsInt :: Bool
parsableAsInt =
Vector (Maybe ByteString) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Int)
asMaybeInt
Bool -> Bool -> Bool
&& Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe ByteString) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Double)
asMaybeDouble
parsableAsDate :: Bool
parsableAsDate = Vector (Maybe ByteString) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe ByteString)
asMaybe Vector (Maybe Day)
asMaybeDate
constructOptional ::
(VU.Unbox a, Columnable a) => VU.Vector a -> VU.Vector Word8 -> IO Column
constructOptional :: forall a.
(Unbox a, Columnable a) =>
Vector a -> Vector Word8 -> IO Column
constructOptional Vector a
vec Vector Word8
valid = do
let size :: Int
size = Vector a -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector a
vec
MVector RealWorld (Maybe a)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe a))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
if (Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
then MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec Int
i Maybe a
forall a. Maybe a
Nothing
else MVector (PrimState IO) (Maybe a) -> Int -> Maybe a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec Int
i (a -> Maybe a
forall a. a -> Maybe a
Just (Vector a
vec Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
Vector (Maybe a) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe a) -> Column) -> IO (Vector (Maybe a)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe a) -> IO (Vector (Maybe a))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe a)
MVector (PrimState IO) (Maybe a)
mvec
constructOptionalBoxed :: V.Vector T.Text -> VU.Vector Word8 -> IO Column
constructOptionalBoxed :: Vector Text -> Vector Word8 -> IO Column
constructOptionalBoxed Vector Text
vec Vector Word8
valid = do
let size :: Int
size = Vector Text -> Int
forall a. Vector a -> Int
V.length Vector Text
vec
MVector RealWorld (Maybe Text)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
if (Vector Word8
valid Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0
then MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i Maybe Text
forall a. Maybe a
Nothing
else MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i (Text -> Maybe Text
forall a. a -> Maybe a
Just (Vector Text
vec Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
i))
Vector (Maybe Text) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Text) -> Column)
-> IO (Vector (Maybe Text)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Text) -> IO (Vector (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec
writeCsv :: FilePath -> DataFrame -> IO ()
writeCsv :: [Char] -> DataFrame -> IO ()
writeCsv = Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
','
writeTsv :: FilePath -> DataFrame -> IO ()
writeTsv :: [Char] -> DataFrame -> IO ()
writeTsv = Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
'\t'
writeSeparated ::
Char ->
FilePath ->
DataFrame ->
IO ()
writeSeparated :: Char -> [Char] -> DataFrame -> IO ()
writeSeparated Char
c [Char]
filepath DataFrame
df = [Char] -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r
withFile [Char]
filepath IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
let headers :: [Text]
headers = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
"," [Text]
headers)
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
let row :: [Text]
row = DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i
Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
"," [Text]
row)
getRowAsText :: DataFrame -> Int -> [T.Text]
getRowAsText :: DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i = (Int -> Column -> [Text] -> [Text])
-> [Text] -> Vector Column -> [Text]
forall a b. (Int -> a -> b -> b) -> b -> Vector a -> b
V.ifoldr Int -> Column -> [Text] -> [Text]
go [] (DataFrame -> Vector Column
columns DataFrame
df)
where
indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
go :: Int -> Column -> [Text] -> [Text]
go Int
k (BoxedColumn (Vector a
c :: V.Vector a)) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
where
textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
e
Maybe (a :~: Text)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
Just a :~~: Maybe
HRefl -> case TypeRep b -> TypeRep Text -> Maybe (b :~: Text)
forall (a :: k1) (b :: k1).
TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality TypeRep b
t2 (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just b :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"null" a
Maybe Text
e
Maybe (b :~: Text)
Nothing -> (Text -> Text
fromOptional (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
where
fromOptional :: Text -> Text
fromOptional Text
s
| Text -> Text -> Bool
T.isPrefixOf Text
"Just " Text
s = Int -> Text -> Text
T.drop (Text -> Int
T.length Text
"Just ") Text
s
| Bool
otherwise = Text
"null"
Maybe (a :~~: Maybe)
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
TypeRep a
_ -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
e
Maybe a
Nothing ->
[Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
[Char]
"Column "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
go Int
k (UnboxedColumn Vector a
c) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
Just a
e -> [Char] -> Text
T.pack (a -> [Char]
forall a. Show a => a -> [Char]
show a
e) Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
Maybe a
Nothing ->
[Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
[Char]
"Column "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
go Int
k (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a))) [Text]
acc = case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just Maybe a
e -> case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
T.empty Maybe a
Maybe Text
e Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
Maybe (a :~: Text)
Nothing -> Text -> (a -> Text) -> Maybe a -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
T.empty ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) Maybe a
e Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
Maybe (Maybe a)
Nothing ->
[Char] -> [Text]
forall a. HasCallStack => [Char] -> a
error ([Char] -> [Text]) -> [Char] -> [Text]
forall a b. (a -> b) -> a -> b
$
[Char]
"Column "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
stripQuotes :: T.Text -> T.Text
stripQuotes :: Text -> Text
stripQuotes Text
txt =
case Text -> Maybe (Char, Text)
T.uncons Text
txt of
Just (Char
'"', Text
rest) ->
case Text -> Maybe (Text, Char)
T.unsnoc Text
rest of
Just (Text
middle, Char
'"') -> Text
middle
Maybe (Text, Char)
_ -> Text
txt
Maybe (Char, Text)
_ -> Text
txt