{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Core where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Map.Strict as MS
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Control.Exception (throw)
import Data.Either
import Data.Function (on, (&))
import Data.Maybe
import Data.Type.Equality (TestEquality (..))
import DataFrame.Errors
import DataFrame.Internal.Column (
    Column (..),
    Columnable,
    columnLength,
    columnTypeString,
    expandColumn,
    fromList,
    fromVector,
 )
import DataFrame.Internal.DataFrame (DataFrame (..), empty, getColumn)
import DataFrame.Internal.Parsing (isNullish)
import DataFrame.Internal.Row (Any, mkColumnFromRow)
import Type.Reflection
import Prelude hiding (null)
dimensions :: DataFrame -> (Int, Int)
dimensions :: DataFrame -> (Int, Int)
dimensions = DataFrame -> (Int, Int)
dataframeDimensions
{-# INLINE dimensions #-}
columnNames :: DataFrame -> [T.Text]
columnNames :: DataFrame -> [Text]
columnNames = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices
{-# INLINE columnNames #-}
insertVector ::
    forall a.
    (Columnable a) =>
    
    T.Text ->
    
    V.Vector a ->
    
    DataFrame ->
    DataFrame
insertVector :: forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
xs)
{-# INLINE insertVector #-}
insertVectorWithDefault ::
    forall a.
    (Columnable a) =>
    
    a ->
    
    T.Text ->
    
    V.Vector a ->
    
    DataFrame ->
    DataFrame
insertVectorWithDefault :: forall a.
Columnable a =>
a -> Text -> Vector a -> DataFrame -> DataFrame
insertVectorWithDefault a
defaultValue Text
name Vector a
xs DataFrame
d =
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        values :: Vector a
values = Vector a
xs Vector a -> Vector a -> Vector a
forall a. Vector a -> Vector a -> Vector a
V.++ Int -> a -> Vector a
forall a. Int -> a -> Vector a
V.replicate (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
xs) a
defaultValue
     in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
values) DataFrame
d
insertUnboxedVector ::
    forall a.
    (Columnable a, VU.Unbox a) =>
    
    T.Text ->
    
    VU.Vector a ->
    
    DataFrame ->
    DataFrame
insertUnboxedVector :: forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector a
xs)
insertColumn ::
    
    T.Text ->
    
    Column ->
    
    DataFrame ->
    DataFrame
insertColumn :: Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
d =
    let
        (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Column -> Int
columnLength Column
column) Int
r
     in
        case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name (DataFrame -> Map Text Int
columnIndices DataFrame
d) of
            Just Int
i ->
                Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> [(Int, Column)] -> Vector Column
forall a. Vector a -> [(Int, a)] -> Vector a
V.// [(Int
i, Column
column)]))
                    (DataFrame -> Map Text Int
columnIndices DataFrame
d)
                    (Int
n, Int
c)
            Maybe Int
Nothing ->
                Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> Column -> Vector Column
forall a. Vector a -> a -> Vector a
`V.snoc` Column
column))
                    (Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
name Int
c (DataFrame -> Map Text Int
columnIndices DataFrame
d))
                    (Int
n, Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
cloneColumn :: T.Text -> T.Text -> DataFrame -> DataFrame
cloneColumn :: Text -> Text -> DataFrame -> DataFrame
cloneColumn Text
original Text
new DataFrame
df = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    ( DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
        Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
original Text
"cloneColumn" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    )
    (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Column
column <- Text -> DataFrame -> Maybe Column
getColumn Text
original DataFrame
df
        DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
new Column
column DataFrame
df
rename :: T.Text -> T.Text -> DataFrame -> DataFrame
rename :: Text -> Text -> DataFrame -> DataFrame
rename Text
orig Text
new DataFrame
df = (DataFrameException -> DataFrame)
-> (DataFrame -> DataFrame)
-> Either DataFrameException DataFrame
-> DataFrame
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrame -> DataFrame
forall a. a -> a
id (Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df)
renameMany :: [(T.Text, T.Text)] -> DataFrame -> DataFrame
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
renameMany = ((Text, Text) -> DataFrame -> DataFrame)
-> [(Text, Text)] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold ((Text -> Text -> DataFrame -> DataFrame)
-> (Text, Text) -> DataFrame -> DataFrame
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Text -> DataFrame -> DataFrame
rename)
renameSafe ::
    T.Text -> T.Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df = Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    (DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
orig Text
"rename" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df))
    (Maybe (Either DataFrameException DataFrame)
 -> Either DataFrameException DataFrame)
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Int
columnIndex <- Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let origRemoved :: Map Text Int
origRemoved = Text -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> Map k a -> Map k a
M.delete Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let newAdded :: Map Text Int
newAdded = Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
new Int
columnIndex Map Text Int
origRemoved
        Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Either DataFrameException DataFrame
forall a b. b -> Either a b
Right DataFrame
df{columnIndices = newAdded})
data ColumnInfo = ColumnInfo
    { ColumnInfo -> Text
nameOfColumn :: !T.Text
    , ColumnInfo -> Int
nonNullValues :: !Int
    , ColumnInfo -> Int
nullValues :: !Int
    , ColumnInfo -> Text
typeOfColumn :: !T.Text
    }
describeColumns :: DataFrame -> DataFrame
describeColumns :: DataFrame -> DataFrame
describeColumns DataFrame
df =
    DataFrame
empty
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Column Name" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
nameOfColumn [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Non-null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nonNullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Type" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
typeOfColumn [ColumnInfo]
infos))
  where
    infos :: [ColumnInfo]
infos =
        (ColumnInfo -> ColumnInfo -> Ordering)
-> [ColumnInfo] -> [ColumnInfo]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (ColumnInfo -> Int) -> ColumnInfo -> ColumnInfo -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ColumnInfo -> Int
nonNullValues) (([ColumnInfo] -> Int -> Column -> [ColumnInfo])
-> [ColumnInfo] -> Vector Column -> [ColumnInfo]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [] (DataFrame -> Vector Column
columns DataFrame
df)) ::
            [ColumnInfo]
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    columnName :: Int -> Maybe Text
columnName Int
i = Int -> Map Int Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Int
i Map Int Text
indexMap
    go :: [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [ColumnInfo]
acc Int
i col :: Column
col@(OptionalColumn (Vector (Maybe a)
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            countNulls :: Int
countNulls = Column -> Int
nulls Column
col
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (TypeRep (Maybe a) -> String) -> TypeRep (Maybe a) -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
countNulls)
                        Int
countNulls
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(BoxedColumn (Vector a
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (TypeRep a -> String) -> TypeRep a -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col)
                        Int
0
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(UnboxedColumn Vector a
c) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Column -> String
columnTypeString Column
col
         in
            
            
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col) Int
0 Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
nulls :: Column -> Int
nulls :: Column -> Int
nulls (OptionalColumn Vector (Maybe a)
xs) = Vector (Maybe a) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe a) -> Int) -> Vector (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Bool) -> Vector (Maybe a) -> Vector (Maybe a)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe a -> Bool
forall a. Maybe a -> Bool
isNothing Vector (Maybe a)
xs
nulls (BoxedColumn (Vector a
xs :: V.Vector a)) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
    Just a :~: Text
Refl -> Vector Text -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector Text -> Int) -> Vector Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Text -> Bool
isNullish Vector a
Vector Text
xs
    Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
        Just a :~: String
Refl -> Vector String -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector String -> Int) -> Vector String -> Int
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> Vector String -> Vector String
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter (Text -> Bool
isNullish (Text -> Bool) -> (String -> Text) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) Vector a
Vector String
xs
        Maybe (a :~: String)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
            App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
                Just a :~~: Maybe
HRefl -> Vector (Maybe b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe b) -> Int) -> Vector (Maybe b) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe b -> Bool) -> Vector (Maybe b) -> Vector (Maybe b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe b -> Bool
forall a. Maybe a -> Bool
isNothing Vector a
Vector (Maybe b)
xs
                Maybe (a :~~: Maybe)
Nothing -> Int
0
            TypeRep a
_ -> Int
0
nulls Column
_ = Int
0
partiallyParsed :: Column -> Int
partiallyParsed :: Column -> Int
partiallyParsed (BoxedColumn (Vector a
xs :: V.Vector a)) =
    case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
        App (App TypeRep a
tycon TypeRep b
t1) TypeRep b
t2 -> case TypeRep a -> TypeRep Either -> Maybe (a :~~: Either)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
tycon (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> * -> *). Typeable a => TypeRep a
typeRep @Either) of
            Just a :~~: Either
HRefl -> Vector (Either b b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Either b b) -> Int) -> Vector (Either b b) -> Int
forall a b. (a -> b) -> a -> b
$ (Either b b -> Bool) -> Vector (Either b b) -> Vector (Either b b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Either b b -> Bool
forall a b. Either a b -> Bool
isLeft Vector a
Vector (Either b b)
xs
            Maybe (a :~~: Either)
Nothing -> Int
0
        TypeRep a
_ -> Int
0
partiallyParsed Column
_ = Int
0
fromNamedColumns :: [(T.Text, Column)] -> DataFrame
fromNamedColumns :: [(Text, Column)] -> DataFrame
fromNamedColumns = (DataFrame -> (Text, Column) -> DataFrame)
-> DataFrame -> [(Text, Column)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Text
name, Column
column) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
df) DataFrame
empty
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns = [(Text, Column)] -> DataFrame
fromNamedColumns ([(Text, Column)] -> DataFrame)
-> ([Column] -> [(Text, Column)]) -> [Column] -> DataFrame
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Column] -> [(Text, Column)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Integer -> Text) -> [Integer] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Integer -> String) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> String
forall a. Show a => a -> String
show) [Integer
0 ..])
fromRows :: [T.Text] -> [[Any]] -> DataFrame
fromRows :: [Text] -> [[Any]] -> DataFrame
fromRows [Text]
names [[Any]]
rows =
    (DataFrame -> Int -> DataFrame) -> DataFrame -> [Int] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl'
        (\DataFrame
df Int
i -> Text -> Column -> DataFrame -> DataFrame
insertColumn ([Text]
names [Text] -> Int -> Text
forall a. HasCallStack => [a] -> Int -> a
!! Int
i) (Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows) DataFrame
df)
        DataFrame
empty
        [Int
0 .. [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
names Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
valueCounts :: forall a. (Columnable a) => T.Text -> DataFrame -> [(a, Int)]
valueCounts :: forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"valueCounts" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just (BoxedColumn (Vector a
column' :: V.Vector c)) ->
        let
            column :: Map a Int
column = (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty Vector a
column'
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column
    Just (OptionalColumn (Vector (Maybe a)
column' :: V.Vector c)) ->
        let
            column :: Map (Maybe a) Int
column = (Map (Maybe a) Int -> Maybe a -> Map (Maybe a) Int)
-> Map (Maybe a) Int -> Vector (Maybe a) -> Map (Maybe a) Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map (Maybe a) Int
m Maybe a
v -> (Int -> Int -> Int)
-> Maybe a -> Int -> Map (Maybe a) Int -> Map (Maybe a) Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Maybe a
v (Int
1 :: Int) Map (Maybe a) Int
m) Map (Maybe a) Int
forall k a. Map k a
M.empty Vector (Maybe a)
column'
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: Maybe a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a (Maybe a) -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep (Maybe a))
expectedType = TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. b -> Either a b
Right (TypeRep (Maybe a) -> Either String (TypeRep (Maybe a)))
-> TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: Maybe a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map (Maybe a) Int
column
    Just (UnboxedColumn (Vector a
column' :: VU.Vector c)) ->
        let
            column :: Map a Int
column =
                (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column')
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column
fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold :: forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold a -> DataFrame -> DataFrame
f [a]
xs DataFrame
acc = (DataFrame -> a -> DataFrame) -> DataFrame -> [a] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((a -> DataFrame -> DataFrame) -> DataFrame -> a -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> DataFrame -> DataFrame
f) DataFrame
acc [a]
xs