{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.IO.CSV where
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Char8 as C8
import qualified Data.List as L
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.IO as TIO
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Control.Applicative (many, (<|>))
import Control.Monad (forM_, unless, zipWithM_)
import Data.Attoparsec.ByteString.Char8 hiding (endOfLine)
import Data.Bits (shiftL)
import Data.Char
import Data.Either
import Data.Function (on)
import Data.Functor
import Data.IORef
import Data.Maybe
import Data.Type.Equality (TestEquality (testEquality))
import DataFrame.Internal.Column (Column (..), columnLength)
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Internal.Parsing
import DataFrame.Operations.Typing
import System.IO
import Type.Reflection
import Prelude hiding (concat, takeWhile)
data GrowingVector a = GrowingVector
{ forall a. GrowingVector a -> IORef (IOVector a)
gvData :: !(IORef (VM.IOVector a))
, forall a. GrowingVector a -> IORef Int
gvSize :: !(IORef Int)
, forall a. GrowingVector a -> IORef Int
gvCapacity :: !(IORef Int)
}
data GrowingUnboxedVector a = GrowingUnboxedVector
{ forall a. GrowingUnboxedVector a -> IORef (IOVector a)
guvData :: !(IORef (VUM.IOVector a))
, forall a. GrowingUnboxedVector a -> IORef Int
guvSize :: !(IORef Int)
, forall a. GrowingUnboxedVector a -> IORef Int
guvCapacity :: !(IORef Int)
}
data GrowingColumn
= GrowingInt !(GrowingUnboxedVector Int) !(IORef [Int])
| GrowingDouble !(GrowingUnboxedVector Double) !(IORef [Int])
| GrowingText !(GrowingVector T.Text) !(IORef [Int])
data ReadOptions = ReadOptions
{ :: Bool
, ReadOptions -> Bool
inferTypes :: Bool
, ReadOptions -> Bool
safeRead :: Bool
, ReadOptions -> Int
chunkSize :: Int
, ReadOptions -> String
dateFormat :: String
}
defaultReadOptions :: ReadOptions
defaultReadOptions :: ReadOptions
defaultReadOptions =
ReadOptions
{ hasHeader :: Bool
hasHeader = Bool
True
, inferTypes :: Bool
inferTypes = Bool
True
, safeRead :: Bool
safeRead = Bool
True
, chunkSize :: Int
chunkSize = Int
512_000
, dateFormat :: String
dateFormat = String
"%Y-%m-%d"
}
newGrowingVector :: Int -> IO (GrowingVector a)
newGrowingVector :: forall a. Int -> IO (GrowingVector a)
newGrowingVector !Int
initCap = do
IOVector a
vec <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
initCap
IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a
forall a.
IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a
GrowingVector (IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> IORef Int -> GrowingVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
vec IO (IORef Int -> IORef Int -> GrowingVector a)
-> IO (IORef Int) -> IO (IORef Int -> GrowingVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0 IO (IORef Int -> GrowingVector a)
-> IO (IORef Int) -> IO (GrowingVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
initCap
newGrowingUnboxedVector :: (VUM.Unbox a) => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector :: forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector !Int
initCap = do
IOVector a
vec <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
initCap
IORef (IOVector a)
-> IORef Int -> IORef Int -> GrowingUnboxedVector a
forall a.
IORef (IOVector a)
-> IORef Int -> IORef Int -> GrowingUnboxedVector a
GrowingUnboxedVector (IORef (IOVector a)
-> IORef Int -> IORef Int -> GrowingUnboxedVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> IORef Int -> GrowingUnboxedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
vec IO (IORef Int -> IORef Int -> GrowingUnboxedVector a)
-> IO (IORef Int) -> IO (IORef Int -> GrowingUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0 IO (IORef Int -> GrowingUnboxedVector a)
-> IO (IORef Int) -> IO (GrowingUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
initCap
appendGrowingVector :: GrowingVector a -> a -> IO ()
appendGrowingVector :: forall a. GrowingVector a -> a -> IO ()
appendGrowingVector (GrowingVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
capRef) !a
val = do
Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
Int
cap <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
capRef
IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
IOVector a
vec' <-
if Int
size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
cap
then do
let !newCap :: Int
newCap = Int
cap Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
IOVector a
newVec <- MVector (PrimState IO) a -> Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
VM.unsafeGrow IOVector a
MVector (PrimState IO) a
vec Int
newCap
IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
vecRef IOVector a
newVec
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
capRef Int
newCap
IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
newVec
else IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
vec
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
vec' Int
size a
val
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
sizeRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
appendGrowingUnboxedVector ::
(VUM.Unbox a) => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector :: forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector (GrowingUnboxedVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
capRef) !a
val = do
Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
Int
cap <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
capRef
IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
IOVector a
vec' <-
if Int
size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
cap
then do
let !newCap :: Int
newCap = Int
cap Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
IOVector a
newVec <- MVector (PrimState IO) a -> Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
VUM.unsafeGrow IOVector a
MVector (PrimState IO) a
vec Int
newCap
IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
vecRef IOVector a
newVec
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
capRef Int
newCap
IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
newVec
else IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
vec
MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
vec' Int
size a
val
IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
sizeRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
freezeGrowingVector :: GrowingVector a -> IO (V.Vector a)
freezeGrowingVector :: forall a. GrowingVector a -> IO (Vector a)
freezeGrowingVector (GrowingVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
_) = do
IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
0 Int
size IOVector a
vec)
freezeGrowingUnboxedVector ::
(VUM.Unbox a) => GrowingUnboxedVector a -> IO (VU.Vector a)
freezeGrowingUnboxedVector :: forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector (GrowingUnboxedVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
_) = do
IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.freeze (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
0 Int
size IOVector a
vec)
readCsv :: FilePath -> IO DataFrame
readCsv :: String -> IO DataFrame
readCsv = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
',' ReadOptions
defaultReadOptions
readCsvWithOpts :: ReadOptions -> FilePath -> IO DataFrame
readCsvWithOpts :: ReadOptions -> String -> IO DataFrame
readCsvWithOpts ReadOptions
opts = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
',' ReadOptions
opts
readTsv :: FilePath -> IO DataFrame
readTsv :: String -> IO DataFrame
readTsv = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
'\t' ReadOptions
defaultReadOptions
readSeparated :: Char -> ReadOptions -> FilePath -> IO DataFrame
readSeparated :: Char -> ReadOptions -> String -> IO DataFrame
readSeparated !Char
sep !ReadOptions
opts !String
path = String -> IOMode -> (Handle -> IO DataFrame) -> IO DataFrame
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
path IOMode
ReadMode ((Handle -> IO DataFrame) -> IO DataFrame)
-> (Handle -> IO DataFrame) -> IO DataFrame
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
Handle -> BufferMode -> IO ()
hSetBuffering Handle
handle (Maybe Int -> BufferMode
BlockBuffering (Int -> Maybe Int
forall a. a -> Maybe a
Just (ReadOptions -> Int
chunkSize ReadOptions
opts)))
ByteString
firstLine <- Handle -> IO ByteString
C8.hGetLine Handle
handle
let firstRow :: [ByteString]
firstRow = Char -> ByteString -> [ByteString]
parseLine Char
sep ByteString
firstLine
columnNames :: [Text]
columnNames =
if ReadOptions -> Bool
hasHeader ReadOptions
opts
then (ByteString -> Text) -> [ByteString] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\"') (Text -> Text) -> (ByteString -> Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeUtf8Lenient) [ByteString]
firstRow
else (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Text
T.singleton (Char -> Text) -> (Int -> Char) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
intToDigit) [Int
0 .. [ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
firstRow Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ReadOptions -> Bool
hasHeader ReadOptions
opts) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
handle SeekMode
AbsoluteSeek Integer
0
ByteString
dataLine <- Handle -> IO ByteString
C8.hGetLine Handle
handle
let dataRow :: [ByteString]
dataRow = Char -> ByteString -> [ByteString]
parseLine Char
sep ByteString
dataLine
[GrowingColumn]
growingCols <- [ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns [ByteString]
dataRow ReadOptions
opts
Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow Int
0 [ByteString]
dataRow [GrowingColumn]
growingCols
Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile Handle
handle Char
sep [GrowingColumn]
growingCols (ReadOptions -> Int
chunkSize ReadOptions
opts) Int
1
Vector Column
frozenCols <- [Column] -> Vector Column
forall a. [a] -> Vector a
V.fromList ([Column] -> Vector Column) -> IO [Column] -> IO (Vector Column)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (GrowingColumn -> IO Column) -> [GrowingColumn] -> IO [Column]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM GrowingColumn -> IO Column
freezeGrowingColumn [GrowingColumn]
growingCols
let numRows :: Int
numRows = Int -> (Column -> Int) -> Maybe Column -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 Column -> Int
columnLength (Vector Column
frozenCols Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
0)
df :: DataFrame
df =
DataFrame
{ columns :: Vector Column
columns = Vector Column
frozenCols
, columnIndices :: Map Text Int
columnIndices = [(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
columnNames [Int
0 ..])
, dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
numRows, Vector Column -> Int
forall a. Vector a -> Int
V.length Vector Column
frozenCols)
}
DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame
forall a b. (a -> b) -> a -> b
$
if ReadOptions -> Bool
inferTypes ReadOptions
opts
then Bool -> String -> DataFrame -> DataFrame
parseDefaults (ReadOptions -> Bool
safeRead ReadOptions
opts) (ReadOptions -> String
dateFormat ReadOptions
opts) DataFrame
df
else DataFrame
df
initializeColumns :: [BS.ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns :: [ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns [ByteString]
row ReadOptions
opts = (ByteString -> IO GrowingColumn)
-> [ByteString] -> IO [GrowingColumn]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ByteString -> IO GrowingColumn
initColumn [ByteString]
row
where
initColumn :: BS.ByteString -> IO GrowingColumn
initColumn :: ByteString -> IO GrowingColumn
initColumn ByteString
bs = do
IORef [Int]
nullsRef <- [Int] -> IO (IORef [Int])
forall a. a -> IO (IORef a)
newIORef []
let val :: Text
val = ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs
if ReadOptions -> Bool
inferTypes ReadOptions
opts
then case Text -> InferredType
inferType Text
val of
InferredType
IntType -> GrowingUnboxedVector Int -> IORef [Int] -> GrowingColumn
GrowingInt (GrowingUnboxedVector Int -> IORef [Int] -> GrowingColumn)
-> IO (GrowingUnboxedVector Int)
-> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingUnboxedVector Int)
forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
InferredType
DoubleType -> GrowingUnboxedVector Double -> IORef [Int] -> GrowingColumn
GrowingDouble (GrowingUnboxedVector Double -> IORef [Int] -> GrowingColumn)
-> IO (GrowingUnboxedVector Double)
-> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingUnboxedVector Double)
forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
InferredType
TextType -> GrowingVector Text -> IORef [Int] -> GrowingColumn
GrowingText (GrowingVector Text -> IORef [Int] -> GrowingColumn)
-> IO (GrowingVector Text) -> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingVector Text)
forall a. Int -> IO (GrowingVector a)
newGrowingVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
else GrowingVector Text -> IORef [Int] -> GrowingColumn
GrowingText (GrowingVector Text -> IORef [Int] -> GrowingColumn)
-> IO (GrowingVector Text) -> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingVector Text)
forall a. Int -> IO (GrowingVector a)
newGrowingVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
data InferredType = IntType | DoubleType | TextType
inferType :: T.Text -> InferredType
inferType :: Text -> InferredType
inferType !Text
t
| Text -> Bool
T.null Text
t = InferredType
TextType
| Maybe Int -> Bool
forall a. Maybe a -> Bool
isJust (HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt Text
t) = InferredType
IntType
| Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble Text
t) = InferredType
DoubleType
| Bool
otherwise = InferredType
TextType
processRow :: Int -> [BS.ByteString] -> [GrowingColumn] -> IO ()
processRow :: Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow !Int
rowIdx ![ByteString]
vals ![GrowingColumn]
cols = (ByteString -> GrowingColumn -> IO ())
-> [ByteString] -> [GrowingColumn] -> IO ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (Int -> ByteString -> GrowingColumn -> IO ()
processValue Int
rowIdx) [ByteString]
vals [GrowingColumn]
cols
where
processValue :: Int -> BS.ByteString -> GrowingColumn -> IO ()
processValue :: Int -> ByteString -> GrowingColumn -> IO ()
processValue !Int
idx !ByteString
bs !GrowingColumn
col = do
let !val :: Text
val = ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs
case GrowingColumn
col of
GrowingInt GrowingUnboxedVector Int
gv IORef [Int]
nulls ->
case HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt ByteString
bs of
Just !Int
i -> GrowingUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Int
gv Int
i
Maybe Int
Nothing -> do
GrowingUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Int
gv Int
0
IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
GrowingDouble GrowingUnboxedVector Double
gv IORef [Int]
nulls ->
case HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble ByteString
bs of
Just !Double
d -> GrowingUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Double
gv Double
d
Maybe Double
Nothing -> do
GrowingUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Double
gv Double
0.0
IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
GrowingText GrowingVector Text
gv IORef [Int]
nulls ->
if Text -> Bool
isNull Text
val
then do
GrowingVector Text -> Text -> IO ()
forall a. GrowingVector a -> a -> IO ()
appendGrowingVector GrowingVector Text
gv Text
T.empty
IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
else GrowingVector Text -> Text -> IO ()
forall a. GrowingVector a -> a -> IO ()
appendGrowingVector GrowingVector Text
gv Text
val
isNull :: T.Text -> Bool
isNull :: Text -> Bool
isNull Text
t = Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"NA" Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"NULL" Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"null"
processFile :: Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile :: Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile !Handle
handle !Char
sep ![GrowingColumn]
cols !Int
chunk Int
r = do
let go :: ByteString -> Int -> IO ()
go ByteString
remain !Int
rowIdx =
IO ByteString
-> Parser [ByteString] -> ByteString -> IO (Result [ByteString])
forall (m :: * -> *) a.
Monad m =>
m ByteString -> Parser a -> ByteString -> m (Result a)
parseWith (Handle -> Int -> IO ByteString
C8.hGetNonBlocking Handle
handle Int
chunk) (Char -> Parser [ByteString]
parseRow Char
sep) ByteString
remain IO (Result [ByteString]) -> (Result [ByteString] -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Fail ByteString
unconsumed [String]
ctx String
er -> do
Integer
erpos <- Handle -> IO Integer
hTell Handle
handle
String -> IO ()
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
String
"Failed to parse CSV file around "
String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Integer -> String
forall a. Show a => a -> String
show Integer
erpos
String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" byte; due: "
String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> String
forall a. Show a => a -> String
show String
er
String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"; context: "
String -> String -> String
forall a. Semigroup a => a -> a -> a
<> [String] -> String
forall a. Show a => a -> String
show [String]
ctx
Partial ByteString -> Result [ByteString]
c -> do
String -> IO ()
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Partial handler is called"
Done (ByteString
unconsumed :: C8.ByteString) ([ByteString]
row :: [C8.ByteString]) -> do
Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow Int
rowIdx [ByteString]
row [GrowingColumn]
cols
Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([ByteString] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
row Bool -> Bool -> Bool
|| ByteString
unconsumed ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
forall a. Monoid a => a
mempty) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> IO ()
go ByteString
unconsumed (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
rowIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
ByteString -> Int -> IO ()
go ByteString
"" Int
r
parseLine :: Char -> BS.ByteString -> [BS.ByteString]
parseLine :: Char -> ByteString -> [ByteString]
parseLine !Char
sep = [ByteString] -> Either String [ByteString] -> [ByteString]
forall b a. b -> Either a b -> b
fromRight [] (Either String [ByteString] -> [ByteString])
-> (ByteString -> Either String [ByteString])
-> ByteString
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [ByteString] -> ByteString -> Either String [ByteString]
forall a. Parser a -> ByteString -> Either String a
parseOnly (Char -> Parser [ByteString]
record Char
sep)
parseRow :: Char -> Parser [C8.ByteString]
parseRow :: Char -> Parser [ByteString]
parseRow Char
sep = Char -> Parser [ByteString]
record Char
sep Parser [ByteString] -> Parser ByteString () -> Parser [ByteString]
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput) Parser [ByteString] -> String -> Parser [ByteString]
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV row"
record :: Char -> Parser [BS.ByteString]
record :: Char -> Parser [ByteString]
record Char
sep = Char -> Parser ByteString
field Char
sep Parser ByteString -> Parser ByteString Char -> Parser [ByteString]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy` Char -> Parser ByteString Char
char Char
sep Parser [ByteString] -> String -> Parser [ByteString]
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV record"
field :: Char -> Parser BS.ByteString
field :: Char -> Parser ByteString
field Char
sep = Parser ByteString
quotedField Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser ByteString
unquotedField Char
sep Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV field"
unquotedField :: Char -> Parser BS.ByteString
unquotedField :: Char -> Parser ByteString
unquotedField Char
sep = (Char -> Bool) -> Parser ByteString
takeWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
sep Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r') Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"unquoted field"
quotedField :: Parser BS.ByteString
quotedField :: Parser ByteString
quotedField =
do
Char -> Parser ByteString Char
char Char
'"'
LazyByteString
contents <- Builder -> LazyByteString
Builder.toLazyByteString (Builder -> LazyByteString)
-> Parser ByteString Builder -> Parser ByteString LazyByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Builder
parseQuotedContents
Char -> Parser ByteString Char
char Char
'"'
ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Parser ByteString)
-> ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ LazyByteString -> ByteString
BS.toStrict LazyByteString
contents
Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"quoted field"
where
parseQuotedContents :: Parser ByteString Builder
parseQuotedContents = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> Parser ByteString [Builder] -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Builder -> Parser ByteString [Builder]
forall a. Parser ByteString a -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString Builder
quotedChar
quotedChar :: Parser ByteString Builder
quotedChar =
ByteString -> Builder
Builder.byteString (ByteString -> Builder)
-> Parser ByteString -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"')
Parser ByteString Builder
-> Parser ByteString Builder -> Parser ByteString Builder
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((Char -> Parser ByteString Char
char Char
'"' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
char Char
'"') Parser ByteString Char -> Builder -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
Data.Functor.$> Char -> Builder
Builder.char8 Char
'"')
Parser ByteString Builder -> String -> Parser ByteString Builder
forall i a. Parser i a -> String -> Parser i a
<?> String
"quoted field content"
endOfLine :: Parser ()
endOfLine :: Parser ByteString ()
endOfLine =
(Parser ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ByteString -> Parser ByteString
string ByteString
"\r\n") Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser ByteString Char
char Char
'\n') Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser ByteString Char
char Char
'\r'))
Parser ByteString () -> String -> Parser ByteString ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"line ending"
freezeGrowingColumn :: GrowingColumn -> IO Column
freezeGrowingColumn :: GrowingColumn -> IO Column
freezeGrowingColumn (GrowingInt GrowingUnboxedVector Int
gv IORef [Int]
nullsRef) = do
Vector Int
vec <- GrowingUnboxedVector Int -> IO (Vector Int)
forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector GrowingUnboxedVector Int
gv
[Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Int
vec
else do
let size :: Int
size = Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
vec
MVector RealWorld (Maybe Int)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Int))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
then MVector (PrimState IO) (Maybe Int) -> Int -> Maybe Int -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec Int
i Maybe Int
forall a. Maybe a
Nothing
else MVector (PrimState IO) (Maybe Int) -> Int -> Maybe Int -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec Int
i (Int -> Maybe Int
forall a. a -> Maybe a
Just (Vector Int
vec Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
Vector (Maybe Int) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Int) -> Column)
-> IO (Vector (Maybe Int)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Int) -> IO (Vector (Maybe Int))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec
freezeGrowingColumn (GrowingDouble GrowingUnboxedVector Double
gv IORef [Int]
nullsRef) = do
Vector Double
vec <- GrowingUnboxedVector Double -> IO (Vector Double)
forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector GrowingUnboxedVector Double
gv
[Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Double
vec
else do
let size :: Int
size = Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
vec
MVector RealWorld (Maybe Double)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Double))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
then MVector (PrimState IO) (Maybe Double)
-> Int -> Maybe Double -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec Int
i Maybe Double
forall a. Maybe a
Nothing
else MVector (PrimState IO) (Maybe Double)
-> Int -> Maybe Double -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec Int
i (Double -> Maybe Double
forall a. a -> Maybe a
Just (Vector Double
vec Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
Vector (Maybe Double) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Double) -> Column)
-> IO (Vector (Maybe Double)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Double) -> IO (Vector (Maybe Double))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec
freezeGrowingColumn (GrowingText GrowingVector Text
gv IORef [Int]
nullsRef) = do
Vector Text
vec <- GrowingVector Text -> IO (Vector Text)
forall a. GrowingVector a -> IO (Vector a)
freezeGrowingVector GrowingVector Text
gv
[Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Text -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector Text
vec
else do
let size :: Int
size = Vector Text -> Int
forall a. Vector a -> Int
V.length Vector Text
vec
MVector RealWorld (Maybe Text)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
then MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i Maybe Text
forall a. Maybe a
Nothing
else MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i (Text -> Maybe Text
forall a. a -> Maybe a
Just (Vector Text
vec Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
i))
Vector (Maybe Text) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Text) -> Column)
-> IO (Vector (Maybe Text)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Text) -> IO (Vector (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec
writeCsv :: FilePath -> DataFrame -> IO ()
writeCsv :: String -> DataFrame -> IO ()
writeCsv = Char -> String -> DataFrame -> IO ()
writeSeparated Char
','
writeSeparated ::
Char ->
FilePath ->
DataFrame ->
IO ()
writeSeparated :: Char -> String -> DataFrame -> IO ()
writeSeparated Char
c String
filepath DataFrame
df = String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
filepath IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
let headers :: [Text]
headers = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
", " [Text]
headers)
[Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
let row :: [Text]
row = DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i
Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
", " [Text]
row)
getRowAsText :: DataFrame -> Int -> [T.Text]
getRowAsText :: DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i = (Int -> Column -> [Text] -> [Text])
-> [Text] -> Vector Column -> [Text]
forall a b. (Int -> a -> b -> b) -> b -> Vector a -> b
V.ifoldr Int -> Column -> [Text] -> [Text]
go [] (DataFrame -> Vector Column
columns DataFrame
df)
where
indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
go :: Int -> Column -> [Text] -> [Text]
go Int
k (BoxedColumn (Vector a
c :: V.Vector a)) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
where
textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
e
Maybe (a :~: Text)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
Just a :~~: Maybe
HRefl -> case TypeRep b -> TypeRep Text -> Maybe (b :~: Text)
forall (a :: k1) (b :: k1).
TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality TypeRep b
t2 (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just b :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"null" a
Maybe Text
e
Maybe (b :~: Text)
Nothing -> (Text -> Text
fromOptional (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
where
fromOptional :: Text -> Text
fromOptional Text
s
| Text -> Text -> Bool
T.isPrefixOf Text
"Just " Text
s = Int -> Text -> Text
T.drop (Text -> Int
T.length Text
"Just ") Text
s
| Bool
otherwise = Text
"null"
Maybe (a :~~: Maybe)
Nothing -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
TypeRep a
_ -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
Maybe a
Nothing ->
String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
String
"Column "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i
go Int
k (UnboxedColumn Vector a
c) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
Just a
e -> String -> Text
T.pack (a -> String
forall a. Show a => a -> String
show a
e) Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
Maybe a
Nothing ->
String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
String
"Column "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i
go Int
k (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a))) [Text]
acc = case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just Maybe a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
where
textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
"Nothing" Maybe a
e
Maybe (a :~: Text)
Nothing -> (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Maybe a
e
Maybe (Maybe a)
Nothing ->
String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
String
"Column "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i