{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Core where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Map.Strict as MS
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU

import Control.Exception (throw)
import Data.Either
import Data.Function (on, (&))
import Data.Maybe
import Data.Type.Equality (TestEquality (..))
import DataFrame.Errors
import DataFrame.Internal.Column (
    Column (..),
    Columnable,
    columnLength,
    columnTypeString,
    expandColumn,
    fromList,
    fromVector,
 )
import DataFrame.Internal.DataFrame (DataFrame (..), empty, getColumn)
import DataFrame.Internal.Parsing (isNullish)
import DataFrame.Internal.Row (Any, mkColumnFromRow)
import Type.Reflection
import Prelude hiding (null)

{- | O(1) Get DataFrame dimensions i.e. (rows, columns)

==== __Example__
@
ghci> D.dimensions df

(100, 3)
@
-}
dimensions :: DataFrame -> (Int, Int)
dimensions :: DataFrame -> (Int, Int)
dimensions = DataFrame -> (Int, Int)
dataframeDimensions
{-# INLINE dimensions #-}

{- | O(k) Get column names of the DataFrame in order of insertion.

==== __Example__
@
ghci> D.columnNames df

["col_a", "col_b", "col_c"]
@
-}
columnNames :: DataFrame -> [T.Text]
columnNames :: DataFrame -> [Text]
columnNames = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices
{-# INLINE columnNames #-}

{- | Adds a vector to the dataframe. If the vector has less elements than the dataframe and the dataframe is not empty
the vector is converted to type `Maybe a` filled with `Nothing` to match the size of the dataframe. Similarly,
if the vector has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe <Type>` and filled with `Nothing`.

==== __Example__
@
ghci> import qualified Data.Vector as V

ghci> D.insertVector "numbers" (V.fromList [1..10]) D.empty

---------------
index | numbers
------|--------
 Int  |   Int
------|--------
0     | 1
1     | 2
2     | 3
3     | 4
4     | 5
5     | 6
6     | 7
7     | 8
8     | 9
9     | 10

@
-}
insertVector ::
    forall a.
    (Columnable a) =>
    -- | Column Name
    T.Text ->
    -- | Vector to add to column
    V.Vector a ->
    -- | DataFrame to add column to
    DataFrame ->
    DataFrame
insertVector :: forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
xs)
{-# INLINE insertVector #-}

{- | /O(k)/ Add a column to the dataframe providing a default.
This constructs a new vector and also may convert it
to an unboxed vector if necessary. Since columns are usually
large the runtime is dominated by the length of the list, k.
-}
insertVectorWithDefault ::
    forall a.
    (Columnable a) =>
    -- | Default Value
    a ->
    -- | Column name
    T.Text ->
    -- | Data to add to column
    V.Vector a ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertVectorWithDefault :: forall a.
Columnable a =>
a -> Text -> Vector a -> DataFrame -> DataFrame
insertVectorWithDefault a
defaultValue Text
name Vector a
xs DataFrame
d =
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        values :: Vector a
values = Vector a
xs Vector a -> Vector a -> Vector a
forall a. Vector a -> Vector a -> Vector a
V.++ Int -> a -> Vector a
forall a. Int -> a -> Vector a
V.replicate (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
xs) a
defaultValue
     in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
values) DataFrame
d

{- | /O(n)/ Adds an unboxed vector to the dataframe.

Same as insertVector but takes an unboxed vector. If you insert a vector of numbers through insertVector it will either way be converted
into an unboxed vector so this function saves that extra work/conversion.
-}
insertUnboxedVector ::
    forall a.
    (Columnable a, VU.Unbox a) =>
    -- | Column Name
    T.Text ->
    -- | Unboxed vector to add to column
    VU.Vector a ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertUnboxedVector :: forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector a
xs)

{- | /O(n)/ Add a column to the dataframe.

==== __Example__
@
ghci> D.insertColumn "numbers" (D.fromList [1..10]) D.empty

---------------
index | numbers
------|--------
 Int  |   Int
------|--------
0     | 1
1     | 2
2     | 3
3     | 4
4     | 5
5     | 6
6     | 7
7     | 8
8     | 9
9     | 10

@
-}
insertColumn ::
    -- | Column Name
    T.Text ->
    -- | Column to add
    Column ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertColumn :: Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
d =
    let
        (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Column -> Int
columnLength Column
column) Int
r
     in
        case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name (DataFrame -> Map Text Int
columnIndices DataFrame
d) of
            Just Int
i ->
                Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> [(Int, Column)] -> Vector Column
forall a. Vector a -> [(Int, a)] -> Vector a
V.// [(Int
i, Column
column)]))
                    (DataFrame -> Map Text Int
columnIndices DataFrame
d)
                    (Int
n, Int
c)
            Maybe Int
Nothing ->
                Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> Column -> Vector Column
forall a. Vector a -> a -> Vector a
`V.snoc` Column
column))
                    (Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
name Int
c (DataFrame -> Map Text Int
columnIndices DataFrame
d))
                    (Int
n, Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

{- | /O(n)/ Clones a column and places it under a new name in the dataframe.

==== __Example__
@
ghci> import qualified Data.Vector as V

ghci> df = insertVector "numbers" (V.fromList [1..10]) D.empty

ghci> D.cloneColumn "numbers" "others" df

------------------------
index | numbers | others
------|---------|-------
 Int  |   Int   |  Int
------|---------|-------
0     | 1       | 1
1     | 2       | 2
2     | 3       | 3
3     | 4       | 4
4     | 5       | 5
5     | 6       | 6
6     | 7       | 7
7     | 8       | 8
8     | 9       | 9
9     | 10      | 10

@
-}
cloneColumn :: T.Text -> T.Text -> DataFrame -> DataFrame
cloneColumn :: Text -> Text -> DataFrame -> DataFrame
cloneColumn Text
original Text
new DataFrame
df = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    ( DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
        Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
original Text
"cloneColumn" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    )
    (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Column
column <- Text -> DataFrame -> Maybe Column
getColumn Text
original DataFrame
df
        DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
new Column
column DataFrame
df

{- | /O(n)/ Renames a single column.

==== __Example__
@
ghci> import qualified Data.Vector as V

ghci> df = insertVector "numbers" (V.fromList [1..10]) D.empty

ghci> D.rename "numbers" "others" df

--------------
index | others
------|-------
 Int  |  Int
------|-------
0     | 1
1     | 2
2     | 3
3     | 4
4     | 5
5     | 6
6     | 7
7     | 8
8     | 9
9     | 10

@
-}
rename :: T.Text -> T.Text -> DataFrame -> DataFrame
rename :: Text -> Text -> DataFrame -> DataFrame
rename Text
orig Text
new DataFrame
df = (DataFrameException -> DataFrame)
-> (DataFrame -> DataFrame)
-> Either DataFrameException DataFrame
-> DataFrame
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrame -> DataFrame
forall a. a -> a
id (Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df)

{- | /O(n)/ Renames many columns.

==== __Example__
@
ghci> import qualified Data.Vector as V

ghci> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)

ghci> df

------------------------
index | numbers | others
------|---------|-------
 Int  |   Int   |  Int
------|---------|-------
0     | 1       | 11
1     | 2       | 12
2     | 3       | 13
3     | 4       | 14
4     | 5       | 15
5     | 6       | 16
6     | 7       | 17
7     | 8       | 18
8     | 9       | 19
9     | 10      | 20

ghci> D.renameMany [("numbers", "first_10"), ("others", "next_10")] df

--------------------------
index | first_10 | next_10
------|----------|--------
 Int  |   Int    |   Int
------|----------|--------
0     | 1        | 11
1     | 2        | 12
2     | 3        | 13
3     | 4        | 14
4     | 5        | 15
5     | 6        | 16
6     | 7        | 17
7     | 8        | 18
8     | 9        | 19
9     | 10       | 20

@
-}
renameMany :: [(T.Text, T.Text)] -> DataFrame -> DataFrame
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
renameMany = ((Text, Text) -> DataFrame -> DataFrame)
-> [(Text, Text)] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold ((Text -> Text -> DataFrame -> DataFrame)
-> (Text, Text) -> DataFrame -> DataFrame
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Text -> DataFrame -> DataFrame
rename)

renameSafe ::
    T.Text -> T.Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df = Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    (DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
orig Text
"rename" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df))
    (Maybe (Either DataFrameException DataFrame)
 -> Either DataFrameException DataFrame)
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Int
columnIndex <- Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let origRemoved :: Map Text Int
origRemoved = Text -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> Map k a -> Map k a
M.delete Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let newAdded :: Map Text Int
newAdded = Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
new Int
columnIndex Map Text Int
origRemoved
        Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Either DataFrameException DataFrame
forall a b. b -> Either a b
Right DataFrame
df{columnIndices = newAdded})

data ColumnInfo = ColumnInfo
    { ColumnInfo -> Text
nameOfColumn :: !T.Text
    , ColumnInfo -> Int
nonNullValues :: !Int
    , ColumnInfo -> Int
nullValues :: !Int
    , ColumnInfo -> Int
partiallyParsedValues :: !Int
    , ColumnInfo -> Int
uniqueValues :: !Int
    , ColumnInfo -> Text
typeOfColumn :: !T.Text
    }

{- | O(n * k ^ 2) Returns the number of non-null columns in the dataframe and the type associated with each column.

==== __Example__
@
ghci> import qualified Data.Vector as V

ghci> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)

ghci> D.describeColumns df

-----------------------------------------------------------------------------------------------------
index | Column Name | # Non-null Values | # Null Values | # Partially parsed | # Unique Values | Type
------|-------------|-------------------|---------------|--------------------|-----------------|-----
 Int  |    Text     |        Int        |      Int      |        Int         |       Int       | Text
------|-------------|-------------------|---------------|--------------------|-----------------|-----
0     | others      | 10                | 0             | 0                  | 10              | Int
1     | numbers     | 10                | 0             | 0                  | 10              | Int

@
-}
describeColumns :: DataFrame -> DataFrame
describeColumns :: DataFrame -> DataFrame
describeColumns DataFrame
df =
    DataFrame
empty
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Column Name" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
nameOfColumn [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Non-null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nonNullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Partially parsed" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
partiallyParsedValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Unique Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
uniqueValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Type" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
typeOfColumn [ColumnInfo]
infos))
  where
    infos :: [ColumnInfo]
infos =
        (ColumnInfo -> ColumnInfo -> Ordering)
-> [ColumnInfo] -> [ColumnInfo]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (ColumnInfo -> Int) -> ColumnInfo -> ColumnInfo -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ColumnInfo -> Int
nonNullValues) (([ColumnInfo] -> Int -> Column -> [ColumnInfo])
-> [ColumnInfo] -> Vector Column -> [ColumnInfo]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [] (DataFrame -> Vector Column
columns DataFrame
df)) ::
            [ColumnInfo]
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    columnName :: Int -> Maybe Text
columnName Int
i = Int -> Map Int Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Int
i Map Int Text
indexMap
    go :: [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [ColumnInfo]
acc Int
i col :: Column
col@(OptionalColumn (Vector (Maybe a)
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            countNulls :: Int
countNulls = Column -> Int
nulls Column
col
            countPartial :: Int
countPartial = Column -> Int
partiallyParsed Column
col
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (TypeRep (Maybe a) -> String) -> TypeRep (Maybe a) -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
            unique :: Int
unique = Set (Maybe a) -> Int
forall a. Set a -> Int
S.size (Set (Maybe a) -> Int) -> Set (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Set (Maybe a) -> Set (Maybe a))
-> Set (Maybe a) -> Vector (Maybe a) -> Set (Maybe a)
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr Maybe a -> Set (Maybe a) -> Set (Maybe a)
forall a. Ord a => a -> Set a -> Set a
S.insert Set (Maybe a)
forall a. Set a
S.empty Vector (Maybe a)
c
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
countNulls)
                        Int
countNulls
                        Int
countPartial
                        Int
unique
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(BoxedColumn (Vector a
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            countPartial :: Int
countPartial = Column -> Int
partiallyParsed Column
col
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (TypeRep a -> String) -> TypeRep a -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
            unique :: Int
unique = Set a -> Int
forall a. Set a -> Int
S.size (Set a -> Int) -> Set a -> Int
forall a b. (a -> b) -> a -> b
$ (a -> Set a -> Set a) -> Set a -> Vector a -> Set a
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
S.insert Set a
forall a. Set a
S.empty Vector a
c
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col)
                        Int
0
                        Int
countPartial
                        Int
unique
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(UnboxedColumn Vector a
c) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Column -> String
columnTypeString Column
col
            unique :: Int
unique = Set a -> Int
forall a. Set a -> Int
S.size (Set a -> Int) -> Set a -> Int
forall a b. (a -> b) -> a -> b
$ (a -> Set a -> Set a) -> Set a -> Vector a -> Set a
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
S.insert Set a
forall a. Set a
S.empty Vector a
c
         in
            -- Unboxed columns cannot have nulls since Maybe
            -- is not an instance of Unbox a
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col) Int
0 Int
0 Int
unique Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc

nulls :: Column -> Int
nulls :: Column -> Int
nulls (OptionalColumn Vector (Maybe a)
xs) = Vector (Maybe a) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe a) -> Int) -> Vector (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Bool) -> Vector (Maybe a) -> Vector (Maybe a)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe a -> Bool
forall a. Maybe a -> Bool
isNothing Vector (Maybe a)
xs
nulls (BoxedColumn (Vector a
xs :: V.Vector a)) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
    Just a :~: Text
Refl -> Vector Text -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector Text -> Int) -> Vector Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Text -> Bool
isNullish Vector a
Vector Text
xs
    Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
        Just a :~: String
Refl -> Vector String -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector String -> Int) -> Vector String -> Int
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> Vector String -> Vector String
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter (Text -> Bool
isNullish (Text -> Bool) -> (String -> Text) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) Vector a
Vector String
xs
        Maybe (a :~: String)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
            App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
                Just a :~~: Maybe
HRefl -> Vector (Maybe b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe b) -> Int) -> Vector (Maybe b) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe b -> Bool) -> Vector (Maybe b) -> Vector (Maybe b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe b -> Bool
forall a. Maybe a -> Bool
isNothing Vector a
Vector (Maybe b)
xs
                Maybe (a :~~: Maybe)
Nothing -> Int
0
            TypeRep a
_ -> Int
0
nulls Column
_ = Int
0

partiallyParsed :: Column -> Int
partiallyParsed :: Column -> Int
partiallyParsed (BoxedColumn (Vector a
xs :: V.Vector a)) =
    case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
        App (App TypeRep a
tycon TypeRep b
t1) TypeRep b
t2 -> case TypeRep a -> TypeRep Either -> Maybe (a :~~: Either)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
tycon (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> * -> *). Typeable a => TypeRep a
typeRep @Either) of
            Just a :~~: Either
HRefl -> Vector (Either b b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Either b b) -> Int) -> Vector (Either b b) -> Int
forall a b. (a -> b) -> a -> b
$ (Either b b -> Bool) -> Vector (Either b b) -> Vector (Either b b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Either b b -> Bool
forall a b. Either a b -> Bool
isLeft Vector a
Vector (Either b b)
xs
            Maybe (a :~~: Either)
Nothing -> Int
0
        TypeRep a
_ -> Int
0
partiallyParsed Column
_ = Int
0

{- | Creates a dataframe from a list of tuples with name and column.

==== __Example__
@
ghci> df = D.fromNamedColumns [("numbers", D.fromList [1..10]), ("others", D.fromList [11..20])]

ghci> df

------------------------
index | numbers | others
------|---------|-------
 Int  |   Int   |  Int
------|---------|-------
0     | 1       | 11
1     | 2       | 12
2     | 3       | 13
3     | 4       | 14
4     | 5       | 15
5     | 6       | 16
6     | 7       | 17
7     | 8       | 18
8     | 9       | 19
9     | 10      | 20

@
-}
fromNamedColumns :: [(T.Text, Column)] -> DataFrame
fromNamedColumns :: [(Text, Column)] -> DataFrame
fromNamedColumns = (DataFrame -> (Text, Column) -> DataFrame)
-> DataFrame -> [(Text, Column)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Text
name, Column
column) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
df) DataFrame
empty

{- | Create a dataframe from a list of columns. The column names are "0", "1"... etc.
Useful for quick exploration but you should probably always rename the columns after
or drop the ones you don't want.

==== __Example__
@
ghci> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

ghci> df

-----------------
index |  0  |  1
------|-----|----
 Int  | Int | Int
------|-----|----
0     | 1   | 11
1     | 2   | 12
2     | 3   | 13
3     | 4   | 14
4     | 5   | 15
5     | 6   | 16
6     | 7   | 17
7     | 8   | 18
8     | 9   | 19
9     | 10  | 20

@
-}
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns = [(Text, Column)] -> DataFrame
fromNamedColumns ([(Text, Column)] -> DataFrame)
-> ([Column] -> [(Text, Column)]) -> [Column] -> DataFrame
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Column] -> [(Text, Column)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Integer -> Text) -> [Integer] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Integer -> String) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> String
forall a. Show a => a -> String
show) [Integer
0 ..])

{- | Create a dataframe from a list of column names and rows.

==== __Example__
@
ghci> df = D.fromRows ["A", "B"] [[D.toAny 1, D.toAny 11], [D.toAny 2, D.toAny 12], [D.toAny 3, D.toAny 13]]

ghci> df

-----------------
index |  A  |  B
------|-----|----
 Int  | Int | Int
------|-----|----
0     | 1   | 11
1     | 2   | 12
2     | 3   | 13

@
-}
fromRows :: [T.Text] -> [[Any]] -> DataFrame
fromRows :: [Text] -> [[Any]] -> DataFrame
fromRows [Text]
names [[Any]]
rows =
    (DataFrame -> Int -> DataFrame) -> DataFrame -> [Int] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl'
        (\DataFrame
df Int
i -> Text -> Column -> DataFrame -> DataFrame
insertColumn ([Text]
names [Text] -> Int -> Text
forall a. HasCallStack => [a] -> Int -> a
!! Int
i) (Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows) DataFrame
df)
        DataFrame
empty
        [Int
0 .. [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
names Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]

{- | O (k * n) Counts the occurences of each value in a given column.

==== __Example__
@
ghci> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

ghci> D.valueCounts @Int "0" df

[(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1)]

@
-}
valueCounts :: forall a. (Columnable a) => T.Text -> DataFrame -> [(a, Int)]
valueCounts :: forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"valueCounts" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just (BoxedColumn (Vector a
column' :: V.Vector c)) ->
        let
            column :: Map a Int
column = (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty Vector a
column'
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column
    Just (OptionalColumn (Vector (Maybe a)
column' :: V.Vector c)) ->
        let
            column :: Map (Maybe a) Int
column = (Map (Maybe a) Int -> Maybe a -> Map (Maybe a) Int)
-> Map (Maybe a) Int -> Vector (Maybe a) -> Map (Maybe a) Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map (Maybe a) Int
m Maybe a
v -> (Int -> Int -> Int)
-> Maybe a -> Int -> Map (Maybe a) Int -> Map (Maybe a) Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Maybe a
v (Int
1 :: Int) Map (Maybe a) Int
m) Map (Maybe a) Int
forall k a. Map k a
M.empty Vector (Maybe a)
column'
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: Maybe a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a (Maybe a) -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep (Maybe a))
expectedType = TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. b -> Either a b
Right (TypeRep (Maybe a) -> Either String (TypeRep (Maybe a)))
-> TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: Maybe a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map (Maybe a) Int
column
    Just (UnboxedColumn (Vector a
column' :: VU.Vector c)) ->
        let
            column :: Map a Int
column =
                (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column')
         in
            case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
                Maybe (a :~: a)
Nothing ->
                    DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$
                        TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException
                            ( MkTypeErrorContext
                                { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                }
                            )
                Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column

{- | A left fold for dataframes that takes the dataframe as the last object.
This makes it easier to chain operations.

==== __Example__
@
ghci> D.fold (const id) [1..5] df

-----------------
index |  0  |  1
------|-----|----
 Int  | Int | Int
------|-----|----
0     | 1   | 11
1     | 2   | 12
2     | 3   | 13
3     | 4   | 14
4     | 5   | 15
5     | 6   | 16
6     | 7   | 17
7     | 8   | 18
8     | 9   | 19
9     | 10  | 20

@
-}
fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold :: forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold a -> DataFrame -> DataFrame
f [a]
xs DataFrame
acc = (DataFrame -> a -> DataFrame) -> DataFrame -> [a] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((a -> DataFrame -> DataFrame) -> DataFrame -> a -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> DataFrame -> DataFrame
f) DataFrame
acc [a]
xs