{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.IO.CSV where

import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Char8 as C8
import qualified Data.List as L
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.IO as TIO
import qualified Data.Vector as V
import qualified Data.Vector.Mutable as VM
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM

import Control.Applicative (many, (<|>))
import Control.Monad (forM_, unless, zipWithM_)
import Data.Attoparsec.ByteString.Char8 hiding (endOfLine)
import Data.Bits (shiftL)
import Data.Char
import Data.Either
import Data.Function (on)
import Data.Functor
import Data.IORef
import Data.Maybe
import Data.Type.Equality (TestEquality (testEquality))
import DataFrame.Internal.Column (Column (..), columnLength)
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Internal.Parsing
import DataFrame.Operations.Typing
import System.IO
import Type.Reflection
import Prelude hiding (concat, takeWhile)

data GrowingVector a = GrowingVector
    { forall a. GrowingVector a -> IORef (IOVector a)
gvData :: !(IORef (VM.IOVector a))
    , forall a. GrowingVector a -> IORef Int
gvSize :: !(IORef Int)
    , forall a. GrowingVector a -> IORef Int
gvCapacity :: !(IORef Int)
    }

data GrowingUnboxedVector a = GrowingUnboxedVector
    { forall a. GrowingUnboxedVector a -> IORef (IOVector a)
guvData :: !(IORef (VUM.IOVector a))
    , forall a. GrowingUnboxedVector a -> IORef Int
guvSize :: !(IORef Int)
    , forall a. GrowingUnboxedVector a -> IORef Int
guvCapacity :: !(IORef Int)
    }

data GrowingColumn
    = GrowingInt !(GrowingUnboxedVector Int) !(IORef [Int])
    | GrowingDouble !(GrowingUnboxedVector Double) !(IORef [Int])
    | GrowingText !(GrowingVector T.Text) !(IORef [Int])

-- | CSV read parameters.
data ReadOptions = ReadOptions
    { ReadOptions -> Bool
hasHeader :: Bool
    -- ^ Whether or not the CSV file has a header. (default: True)
    , ReadOptions -> Bool
inferTypes :: Bool
    -- ^ Whether to try and infer types. (default: True)
    , ReadOptions -> Bool
safeRead :: Bool
    -- ^ Whether to partially parse values into `Maybe`/`Either`. (default: True)
    , ReadOptions -> Int
chunkSize :: Int
    -- ^ Default chunk size (in bytes) for csv reader. (default: 512'000)
    , ReadOptions -> String
dateFormat :: String
    {- ^ Format of date fields as recognized by the Data.Time.Format module.

    __Examples:__

    @
    > parseTimeM True defaultTimeLocale "%Y/%-m/%-d" "2010/3/04" :: Maybe Day
    Just 2010-03-04
    > parseTimeM True defaultTimeLocale "%d/%-m/%-Y" "04/3/2010" :: Maybe Day
    Just 2010-03-04
    @
    -}
    }

defaultReadOptions :: ReadOptions
defaultReadOptions :: ReadOptions
defaultReadOptions =
    ReadOptions
        { hasHeader :: Bool
hasHeader = Bool
True
        , inferTypes :: Bool
inferTypes = Bool
True
        , safeRead :: Bool
safeRead = Bool
True
        , chunkSize :: Int
chunkSize = Int
512_000
        , dateFormat :: String
dateFormat = String
"%Y-%m-%d"
        }

newGrowingVector :: Int -> IO (GrowingVector a)
newGrowingVector :: forall a. Int -> IO (GrowingVector a)
newGrowingVector !Int
initCap = do
    IOVector a
vec <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.unsafeNew Int
initCap
    IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a
forall a.
IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a
GrowingVector (IORef (IOVector a) -> IORef Int -> IORef Int -> GrowingVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> IORef Int -> GrowingVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
vec IO (IORef Int -> IORef Int -> GrowingVector a)
-> IO (IORef Int) -> IO (IORef Int -> GrowingVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0 IO (IORef Int -> GrowingVector a)
-> IO (IORef Int) -> IO (GrowingVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
initCap

newGrowingUnboxedVector :: (VUM.Unbox a) => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector :: forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector !Int
initCap = do
    IOVector a
vec <- Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.unsafeNew Int
initCap
    IORef (IOVector a)
-> IORef Int -> IORef Int -> GrowingUnboxedVector a
forall a.
IORef (IOVector a)
-> IORef Int -> IORef Int -> GrowingUnboxedVector a
GrowingUnboxedVector (IORef (IOVector a)
 -> IORef Int -> IORef Int -> GrowingUnboxedVector a)
-> IO (IORef (IOVector a))
-> IO (IORef Int -> IORef Int -> GrowingUnboxedVector a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IOVector a -> IO (IORef (IOVector a))
forall a. a -> IO (IORef a)
newIORef IOVector a
vec IO (IORef Int -> IORef Int -> GrowingUnboxedVector a)
-> IO (IORef Int) -> IO (IORef Int -> GrowingUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
0 IO (IORef Int -> GrowingUnboxedVector a)
-> IO (IORef Int) -> IO (GrowingUnboxedVector a)
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
newIORef Int
initCap

appendGrowingVector :: GrowingVector a -> a -> IO ()
appendGrowingVector :: forall a. GrowingVector a -> a -> IO ()
appendGrowingVector (GrowingVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
capRef) !a
val = do
    Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
    Int
cap <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
capRef
    IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef

    IOVector a
vec' <-
        if Int
size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
cap
            then do
                let !newCap :: Int
newCap = Int
cap Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                IOVector a
newVec <- MVector (PrimState IO) a -> Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
VM.unsafeGrow IOVector a
MVector (PrimState IO) a
vec Int
newCap
                IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
vecRef IOVector a
newVec
                IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
capRef Int
newCap
                IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
newVec
            else IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
vec

    MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite IOVector a
MVector (PrimState IO) a
vec' Int
size a
val
    IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
sizeRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

appendGrowingUnboxedVector ::
    (VUM.Unbox a) => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector :: forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector (GrowingUnboxedVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
capRef) !a
val = do
    Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
    Int
cap <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
capRef
    IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef

    IOVector a
vec' <-
        if Int
size Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
cap
            then do
                let !newCap :: Int
newCap = Int
cap Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1
                IOVector a
newVec <- MVector (PrimState IO) a -> Int -> IO (MVector (PrimState IO) a)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m (MVector (PrimState m) a)
VUM.unsafeGrow IOVector a
MVector (PrimState IO) a
vec Int
newCap
                IORef (IOVector a) -> IOVector a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef (IOVector a)
vecRef IOVector a
newVec
                IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
capRef Int
newCap
                IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
newVec
            else IOVector a -> IO (IOVector a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return IOVector a
vec

    MVector (PrimState IO) a -> Int -> a -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite IOVector a
MVector (PrimState IO) a
vec' Int
size a
val
    IORef Int -> Int -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef Int
sizeRef (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

freezeGrowingVector :: GrowingVector a -> IO (V.Vector a)
freezeGrowingVector :: forall a. GrowingVector a -> IO (Vector a)
freezeGrowingVector (GrowingVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
_) = do
    IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
    Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
    MVector (PrimState IO) a -> IO (Vector a)
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze (Int -> Int -> IOVector a -> IOVector a
forall s a. Int -> Int -> MVector s a -> MVector s a
VM.slice Int
0 Int
size IOVector a
vec)

freezeGrowingUnboxedVector ::
    (VUM.Unbox a) => GrowingUnboxedVector a -> IO (VU.Vector a)
freezeGrowingUnboxedVector :: forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector (GrowingUnboxedVector IORef (IOVector a)
vecRef IORef Int
sizeRef IORef Int
_) = do
    IOVector a
vec <- IORef (IOVector a) -> IO (IOVector a)
forall a. IORef a -> IO a
readIORef IORef (IOVector a)
vecRef
    Int
size <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
sizeRef
    MVector (PrimState IO) a -> IO (Vector a)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.freeze (Int -> Int -> IOVector a -> IOVector a
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
VUM.slice Int
0 Int
size IOVector a
vec)

{- | Read CSV file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readCsv ".\/data\/taxi.csv"

@
-}
readCsv :: FilePath -> IO DataFrame
readCsv :: String -> IO DataFrame
readCsv = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
',' ReadOptions
defaultReadOptions

{- | Read CSV file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readCsvWithOpts ".\/data\/taxi.csv" (D.defaultReadOptions { dateFormat = "%d/%-m/%-Y" })

@
-}
readCsvWithOpts :: ReadOptions -> FilePath -> IO DataFrame
readCsvWithOpts :: ReadOptions -> String -> IO DataFrame
readCsvWithOpts ReadOptions
opts = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
',' ReadOptions
opts

{- | Read TSV (tab separated) file from path and load it into a dataframe.

==== __Example__
@
ghci> D.readTsv ".\/data\/taxi.tsv"

@
-}
readTsv :: FilePath -> IO DataFrame
readTsv :: String -> IO DataFrame
readTsv = Char -> ReadOptions -> String -> IO DataFrame
readSeparated Char
'\t' ReadOptions
defaultReadOptions

{- | Read text file with specified delimiter into a dataframe.

==== __Example__
@
ghci> D.readSeparated ';' D.defaultReadOptions ".\/data\/taxi.txt"

@
-}
readSeparated :: Char -> ReadOptions -> FilePath -> IO DataFrame
readSeparated :: Char -> ReadOptions -> String -> IO DataFrame
readSeparated !Char
sep !ReadOptions
opts !String
path = String -> IOMode -> (Handle -> IO DataFrame) -> IO DataFrame
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
path IOMode
ReadMode ((Handle -> IO DataFrame) -> IO DataFrame)
-> (Handle -> IO DataFrame) -> IO DataFrame
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
    Handle -> BufferMode -> IO ()
hSetBuffering Handle
handle (Maybe Int -> BufferMode
BlockBuffering (Int -> Maybe Int
forall a. a -> Maybe a
Just (ReadOptions -> Int
chunkSize ReadOptions
opts)))

    ByteString
firstLine <- Handle -> IO ByteString
C8.hGetLine Handle
handle
    let firstRow :: [ByteString]
firstRow = Char -> ByteString -> [ByteString]
parseLine Char
sep ByteString
firstLine
        columnNames :: [Text]
columnNames =
            if ReadOptions -> Bool
hasHeader ReadOptions
opts
                then (ByteString -> Text) -> [ByteString] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\"') (Text -> Text) -> (ByteString -> Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeUtf8Lenient) [ByteString]
firstRow
                else (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> Text
T.singleton (Char -> Text) -> (Int -> Char) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
intToDigit) [Int
0 .. [ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
firstRow Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]

    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ReadOptions -> Bool
hasHeader ReadOptions
opts) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
handle SeekMode
AbsoluteSeek Integer
0

    ByteString
dataLine <- Handle -> IO ByteString
C8.hGetLine Handle
handle
    let dataRow :: [ByteString]
dataRow = Char -> ByteString -> [ByteString]
parseLine Char
sep ByteString
dataLine
    [GrowingColumn]
growingCols <- [ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns [ByteString]
dataRow ReadOptions
opts

    Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow Int
0 [ByteString]
dataRow [GrowingColumn]
growingCols

    Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile Handle
handle Char
sep [GrowingColumn]
growingCols (ReadOptions -> Int
chunkSize ReadOptions
opts) Int
1

    Vector Column
frozenCols <- [Column] -> Vector Column
forall a. [a] -> Vector a
V.fromList ([Column] -> Vector Column) -> IO [Column] -> IO (Vector Column)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (GrowingColumn -> IO Column) -> [GrowingColumn] -> IO [Column]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM GrowingColumn -> IO Column
freezeGrowingColumn [GrowingColumn]
growingCols

    let numRows :: Int
numRows = Int -> (Column -> Int) -> Maybe Column -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 Column -> Int
columnLength (Vector Column
frozenCols Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
0)
        df :: DataFrame
df =
            DataFrame
                { columns :: Vector Column
columns = Vector Column
frozenCols
                , columnIndices :: Map Text Int
columnIndices = [(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
columnNames [Int
0 ..])
                , dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
numRows, Vector Column -> Int
forall a. Vector a -> Int
V.length Vector Column
frozenCols)
                }
    DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> IO DataFrame) -> DataFrame -> IO DataFrame
forall a b. (a -> b) -> a -> b
$
        if ReadOptions -> Bool
inferTypes ReadOptions
opts
            then Bool -> String -> DataFrame -> DataFrame
parseDefaults (ReadOptions -> Bool
safeRead ReadOptions
opts) (ReadOptions -> String
dateFormat ReadOptions
opts) DataFrame
df
            else DataFrame
df

initializeColumns :: [BS.ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns :: [ByteString] -> ReadOptions -> IO [GrowingColumn]
initializeColumns [ByteString]
row ReadOptions
opts = (ByteString -> IO GrowingColumn)
-> [ByteString] -> IO [GrowingColumn]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ByteString -> IO GrowingColumn
initColumn [ByteString]
row
  where
    initColumn :: BS.ByteString -> IO GrowingColumn
    initColumn :: ByteString -> IO GrowingColumn
initColumn ByteString
bs = do
        IORef [Int]
nullsRef <- [Int] -> IO (IORef [Int])
forall a. a -> IO (IORef a)
newIORef []
        let val :: Text
val = ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs
        if ReadOptions -> Bool
inferTypes ReadOptions
opts
            then case Text -> InferredType
inferType Text
val of
                InferredType
IntType -> GrowingUnboxedVector Int -> IORef [Int] -> GrowingColumn
GrowingInt (GrowingUnboxedVector Int -> IORef [Int] -> GrowingColumn)
-> IO (GrowingUnboxedVector Int)
-> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingUnboxedVector Int)
forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
                InferredType
DoubleType -> GrowingUnboxedVector Double -> IORef [Int] -> GrowingColumn
GrowingDouble (GrowingUnboxedVector Double -> IORef [Int] -> GrowingColumn)
-> IO (GrowingUnboxedVector Double)
-> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingUnboxedVector Double)
forall a. Unbox a => Int -> IO (GrowingUnboxedVector a)
newGrowingUnboxedVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
                InferredType
TextType -> GrowingVector Text -> IORef [Int] -> GrowingColumn
GrowingText (GrowingVector Text -> IORef [Int] -> GrowingColumn)
-> IO (GrowingVector Text) -> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingVector Text)
forall a. Int -> IO (GrowingVector a)
newGrowingVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef
            else GrowingVector Text -> IORef [Int] -> GrowingColumn
GrowingText (GrowingVector Text -> IORef [Int] -> GrowingColumn)
-> IO (GrowingVector Text) -> IO (IORef [Int] -> GrowingColumn)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO (GrowingVector Text)
forall a. Int -> IO (GrowingVector a)
newGrowingVector Int
1_024 IO (IORef [Int] -> GrowingColumn)
-> IO (IORef [Int]) -> IO GrowingColumn
forall a b. IO (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> IORef [Int] -> IO (IORef [Int])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure IORef [Int]
nullsRef

data InferredType = IntType | DoubleType | TextType

inferType :: T.Text -> InferredType
inferType :: Text -> InferredType
inferType !Text
t
    | Text -> Bool
T.null Text
t = InferredType
TextType
    | Maybe Int -> Bool
forall a. Maybe a -> Bool
isJust (HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt Text
t) = InferredType
IntType
    | Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble Text
t) = InferredType
DoubleType
    | Bool
otherwise = InferredType
TextType

processRow :: Int -> [BS.ByteString] -> [GrowingColumn] -> IO ()
processRow :: Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow !Int
rowIdx ![ByteString]
vals ![GrowingColumn]
cols = (ByteString -> GrowingColumn -> IO ())
-> [ByteString] -> [GrowingColumn] -> IO ()
forall (m :: * -> *) a b c.
Applicative m =>
(a -> b -> m c) -> [a] -> [b] -> m ()
zipWithM_ (Int -> ByteString -> GrowingColumn -> IO ()
processValue Int
rowIdx) [ByteString]
vals [GrowingColumn]
cols
  where
    processValue :: Int -> BS.ByteString -> GrowingColumn -> IO ()
    processValue :: Int -> ByteString -> GrowingColumn -> IO ()
processValue !Int
idx !ByteString
bs !GrowingColumn
col = do
        let !val :: Text
val = ByteString -> Text
TE.decodeUtf8Lenient ByteString
bs
        case GrowingColumn
col of
            GrowingInt GrowingUnboxedVector Int
gv IORef [Int]
nulls ->
                case HasCallStack => ByteString -> Maybe Int
ByteString -> Maybe Int
readByteStringInt ByteString
bs of
                    Just !Int
i -> GrowingUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Int
gv Int
i
                    Maybe Int
Nothing -> do
                        GrowingUnboxedVector Int -> Int -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Int
gv Int
0
                        IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
            GrowingDouble GrowingUnboxedVector Double
gv IORef [Int]
nulls ->
                case HasCallStack => ByteString -> Maybe Double
ByteString -> Maybe Double
readByteStringDouble ByteString
bs of
                    Just !Double
d -> GrowingUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Double
gv Double
d
                    Maybe Double
Nothing -> do
                        GrowingUnboxedVector Double -> Double -> IO ()
forall a. Unbox a => GrowingUnboxedVector a -> a -> IO ()
appendGrowingUnboxedVector GrowingUnboxedVector Double
gv Double
0.0
                        IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
            GrowingText GrowingVector Text
gv IORef [Int]
nulls ->
                if Text -> Bool
isNull Text
val
                    then do
                        GrowingVector Text -> Text -> IO ()
forall a. GrowingVector a -> a -> IO ()
appendGrowingVector GrowingVector Text
gv Text
T.empty
                        IORef [Int] -> ([Int] -> [Int]) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef' IORef [Int]
nulls (Int
idx Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)
                    else GrowingVector Text -> Text -> IO ()
forall a. GrowingVector a -> a -> IO ()
appendGrowingVector GrowingVector Text
gv Text
val

isNull :: T.Text -> Bool
isNull :: Text -> Bool
isNull Text
t = Text -> Bool
T.null Text
t Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"NA" Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"NULL" Bool -> Bool -> Bool
|| Text
t Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"null"

processFile :: Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile :: Handle -> Char -> [GrowingColumn] -> Int -> Int -> IO ()
processFile !Handle
handle !Char
sep ![GrowingColumn]
cols !Int
chunk Int
r = do
    let go :: ByteString -> Int -> IO ()
go ByteString
remain !Int
rowIdx =
            IO ByteString
-> Parser [ByteString] -> ByteString -> IO (Result [ByteString])
forall (m :: * -> *) a.
Monad m =>
m ByteString -> Parser a -> ByteString -> m (Result a)
parseWith (Handle -> Int -> IO ByteString
C8.hGetNonBlocking Handle
handle Int
chunk) (Char -> Parser [ByteString]
parseRow Char
sep) ByteString
remain IO (Result [ByteString]) -> (Result [ByteString] -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
                Fail ByteString
unconsumed [String]
ctx String
er -> do
                    Integer
erpos <- Handle -> IO Integer
hTell Handle
handle
                    String -> IO ()
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
                        String
"Failed to parse CSV file around "
                            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Integer -> String
forall a. Show a => a -> String
show Integer
erpos
                            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" byte; due: "
                            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> String
forall a. Show a => a -> String
show String
er
                            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"; context: "
                            String -> String -> String
forall a. Semigroup a => a -> a -> a
<> [String] -> String
forall a. Show a => a -> String
show [String]
ctx
                Partial ByteString -> Result [ByteString]
c -> do
                    String -> IO ()
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Partial handler is called"
                Done (ByteString
unconsumed :: C8.ByteString) ([ByteString]
row :: [C8.ByteString]) -> do
                    Int -> [ByteString] -> [GrowingColumn] -> IO ()
processRow Int
rowIdx [ByteString]
row [GrowingColumn]
cols
                    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless ([ByteString] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
row Bool -> Bool -> Bool
|| ByteString
unconsumed ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
forall a. Monoid a => a
mempty) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> IO ()
go ByteString
unconsumed (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$! Int
rowIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
    ByteString -> Int -> IO ()
go ByteString
"" Int
r

parseLine :: Char -> BS.ByteString -> [BS.ByteString]
parseLine :: Char -> ByteString -> [ByteString]
parseLine !Char
sep = [ByteString] -> Either String [ByteString] -> [ByteString]
forall b a. b -> Either a b -> b
fromRight [] (Either String [ByteString] -> [ByteString])
-> (ByteString -> Either String [ByteString])
-> ByteString
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [ByteString] -> ByteString -> Either String [ByteString]
forall a. Parser a -> ByteString -> Either String a
parseOnly (Char -> Parser [ByteString]
record Char
sep)

parseRow :: Char -> Parser [C8.ByteString]
parseRow :: Char -> Parser [ByteString]
parseRow Char
sep = Char -> Parser [ByteString]
record Char
sep Parser [ByteString] -> Parser ByteString () -> Parser [ByteString]
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser ByteString ()
endOfLine Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput) Parser [ByteString] -> String -> Parser [ByteString]
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV row"

record :: Char -> Parser [BS.ByteString]
record :: Char -> Parser [ByteString]
record Char
sep = Char -> Parser ByteString
field Char
sep Parser ByteString -> Parser ByteString Char -> Parser [ByteString]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy` Char -> Parser ByteString Char
char Char
sep Parser [ByteString] -> String -> Parser [ByteString]
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV record"

field :: Char -> Parser BS.ByteString
field :: Char -> Parser ByteString
field Char
sep = Parser ByteString
quotedField Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser ByteString
unquotedField Char
sep Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"CSV field"

unquotedField :: Char -> Parser BS.ByteString
unquotedField :: Char -> Parser ByteString
unquotedField Char
sep = (Char -> Bool) -> Parser ByteString
takeWhile (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
sep Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r') Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"unquoted field"

quotedField :: Parser BS.ByteString
quotedField :: Parser ByteString
quotedField =
    do
        Char -> Parser ByteString Char
char Char
'"'
        LazyByteString
contents <- Builder -> LazyByteString
Builder.toLazyByteString (Builder -> LazyByteString)
-> Parser ByteString Builder -> Parser ByteString LazyByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Builder
parseQuotedContents
        Char -> Parser ByteString Char
char Char
'"'
        ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Parser ByteString)
-> ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ LazyByteString -> ByteString
BS.toStrict LazyByteString
contents
        Parser ByteString -> String -> Parser ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"quoted field"
  where
    parseQuotedContents :: Parser ByteString Builder
parseQuotedContents = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> Parser ByteString [Builder] -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Builder -> Parser ByteString [Builder]
forall a. Parser ByteString a -> Parser ByteString [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser ByteString Builder
quotedChar
    quotedChar :: Parser ByteString Builder
quotedChar =
        ByteString -> Builder
Builder.byteString (ByteString -> Builder)
-> Parser ByteString -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"')
            Parser ByteString Builder
-> Parser ByteString Builder -> Parser ByteString Builder
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ((Char -> Parser ByteString Char
char Char
'"' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
char Char
'"') Parser ByteString Char -> Builder -> Parser ByteString Builder
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
Data.Functor.$> Char -> Builder
Builder.char8 Char
'"')
            Parser ByteString Builder -> String -> Parser ByteString Builder
forall i a. Parser i a -> String -> Parser i a
<?> String
"quoted field content"

endOfLine :: Parser ()
endOfLine :: Parser ByteString ()
endOfLine =
    (Parser ByteString -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ByteString -> Parser ByteString
string ByteString
"\r\n") Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser ByteString Char
char Char
'\n') Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser ByteString Char
char Char
'\r'))
        Parser ByteString () -> String -> Parser ByteString ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"line ending"

freezeGrowingColumn :: GrowingColumn -> IO Column
freezeGrowingColumn :: GrowingColumn -> IO Column
freezeGrowingColumn (GrowingInt GrowingUnboxedVector Int
gv IORef [Int]
nullsRef) = do
    Vector Int
vec <- GrowingUnboxedVector Int -> IO (Vector Int)
forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector GrowingUnboxedVector Int
gv
    [Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
    if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Int
vec
        else do
            let size :: Int
size = Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
vec
            MVector RealWorld (Maybe Int)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Int))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
                if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
                    then MVector (PrimState IO) (Maybe Int) -> Int -> Maybe Int -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec Int
i Maybe Int
forall a. Maybe a
Nothing
                    else MVector (PrimState IO) (Maybe Int) -> Int -> Maybe Int -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec Int
i (Int -> Maybe Int
forall a. a -> Maybe a
Just (Vector Int
vec Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
            Vector (Maybe Int) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Int) -> Column)
-> IO (Vector (Maybe Int)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Int) -> IO (Vector (Maybe Int))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Int)
MVector (PrimState IO) (Maybe Int)
mvec
freezeGrowingColumn (GrowingDouble GrowingUnboxedVector Double
gv IORef [Int]
nullsRef) = do
    Vector Double
vec <- GrowingUnboxedVector Double -> IO (Vector Double)
forall a. Unbox a => GrowingUnboxedVector a -> IO (Vector a)
freezeGrowingUnboxedVector GrowingUnboxedVector Double
gv
    [Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
    if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Double
vec
        else do
            let size :: Int
size = Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
vec
            MVector RealWorld (Maybe Double)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Double))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
                if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
                    then MVector (PrimState IO) (Maybe Double)
-> Int -> Maybe Double -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec Int
i Maybe Double
forall a. Maybe a
Nothing
                    else MVector (PrimState IO) (Maybe Double)
-> Int -> Maybe Double -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec Int
i (Double -> Maybe Double
forall a. a -> Maybe a
Just (Vector Double
vec Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i))
            Vector (Maybe Double) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Double) -> Column)
-> IO (Vector (Maybe Double)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Double) -> IO (Vector (Maybe Double))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Double)
MVector (PrimState IO) (Maybe Double)
mvec
freezeGrowingColumn (GrowingText GrowingVector Text
gv IORef [Int]
nullsRef) = do
    Vector Text
vec <- GrowingVector Text -> IO (Vector Text)
forall a. GrowingVector a -> IO (Vector a)
freezeGrowingVector GrowingVector Text
gv
    [Int]
nulls <- IORef [Int] -> IO [Int]
forall a. IORef a -> IO a
readIORef IORef [Int]
nullsRef
    if [Int] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Int]
nulls
        then Column -> IO Column
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Column -> IO Column) -> Column -> IO Column
forall a b. (a -> b) -> a -> b
$ Vector Text -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector Text
vec
        else do
            let size :: Int
size = Vector Text -> Int
forall a. Vector a -> Int
V.length Vector Text
vec
            MVector RealWorld (Maybe Text)
mvec <- Int -> IO (MVector (PrimState IO) (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
Int -> m (MVector (PrimState m) a)
VM.new Int
size
            [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
size Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
                if Int
i Int -> [Int] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
nulls
                    then MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i Maybe Text
forall a. Maybe a
Nothing
                    else MVector (PrimState IO) (Maybe Text) -> Int -> Maybe Text -> IO ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.write MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec Int
i (Text -> Maybe Text
forall a. a -> Maybe a
Just (Vector Text
vec Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
i))
            Vector (Maybe Text) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn (Vector (Maybe Text) -> Column)
-> IO (Vector (Maybe Text)) -> IO Column
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) (Maybe Text) -> IO (Vector (Maybe Text))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.freeze MVector RealWorld (Maybe Text)
MVector (PrimState IO) (Maybe Text)
mvec

writeCsv :: FilePath -> DataFrame -> IO ()
writeCsv :: String -> DataFrame -> IO ()
writeCsv = Char -> String -> DataFrame -> IO ()
writeSeparated Char
','

writeSeparated ::
    -- | Separator
    Char ->
    -- | Path to write to
    FilePath ->
    DataFrame ->
    IO ()
writeSeparated :: Char -> String -> DataFrame -> IO ()
writeSeparated Char
c String
filepath DataFrame
df = String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
filepath IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
handle -> do
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df
    let headers :: [Text]
headers = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
    Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
", " [Text]
headers)
    [Int] -> (Int -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)] ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> do
        let row :: [Text]
row = DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i
        Handle -> Text -> IO ()
TIO.hPutStrLn Handle
handle (Text -> [Text] -> Text
T.intercalate Text
", " [Text]
row)

getRowAsText :: DataFrame -> Int -> [T.Text]
getRowAsText :: DataFrame -> Int -> [Text]
getRowAsText DataFrame
df Int
i = (Int -> Column -> [Text] -> [Text])
-> [Text] -> Vector Column -> [Text]
forall a b. (Int -> a -> b -> b) -> b -> Vector a -> b
V.ifoldr Int -> Column -> [Text] -> [Text]
go [] (DataFrame -> Vector Column
columns DataFrame
df)
  where
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    go :: Int -> Column -> [Text] -> [Text]
go Int
k (BoxedColumn (Vector a
c :: V.Vector a)) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
          where
            textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                Just a :~: Text
Refl -> a
Text
e
                Maybe (a :~: Text)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
                    App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
                        Just a :~~: Maybe
HRefl -> case TypeRep b -> TypeRep Text -> Maybe (b :~: Text)
forall (a :: k1) (b :: k1).
TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality TypeRep b
t2 (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                            Just b :~: Text
Refl -> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"null" a
Maybe Text
e
                            Maybe (b :~: Text)
Nothing -> (Text -> Text
fromOptional (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
                              where
                                fromOptional :: Text -> Text
fromOptional Text
s
                                    | Text -> Text -> Bool
T.isPrefixOf Text
"Just " Text
s = Int -> Text -> Text
T.drop (Text -> Int
T.length Text
"Just ") Text
s
                                    | Bool
otherwise = Text
"null"
                        Maybe (a :~~: Maybe)
Nothing -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
                    TypeRep a
_ -> (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) a
e
        Maybe a
Nothing ->
            String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
                String
"Column "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i
    go Int
k (UnboxedColumn Vector a
c) [Text]
acc = case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
        Just a
e -> String -> Text
T.pack (a -> String
forall a. Show a => a -> String
show a
e) Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
        Maybe a
Nothing ->
            String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
                String
"Column "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i
    go Int
k (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a))) [Text]
acc = case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
        Just Maybe a
e -> Text
textRep Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc
          where
            textRep :: Text
textRep = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                Just a :~: Text
Refl -> a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
"Nothing" Maybe a
e
                Maybe (a :~: Text)
Nothing -> (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Maybe a
e
        Maybe (Maybe a)
Nothing ->
            String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$
                String
"Column "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Map Int Text
indexMap Map Int Text -> Int -> Text
forall k a. Ord k => Map k a -> k -> a
M.! Int
k)
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" has less items than "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"the other columns at index "
                    String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i