{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Core where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Map.Strict as MS
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU

import Control.Exception ( throw )
import DataFrame.Errors
import DataFrame.Internal.Column ( Column(..), fromVector, fromList, columnLength, columnTypeString, expandColumn, Columnable)
import DataFrame.Internal.DataFrame (DataFrame(..), getColumn, null, empty)
import DataFrame.Internal.Parsing (isNullish)
import Data.Either
import Data.Function (on, (&))
import Data.Maybe
import Data.Type.Equality (type (:~:)(Refl), TestEquality(..))
import Type.Reflection
import Prelude hiding (null)

-- | O(1) Get DataFrame dimensions i.e. (rows, columns)
dimensions :: DataFrame -> (Int, Int)
dimensions :: DataFrame -> (Int, Int)
dimensions = DataFrame -> (Int, Int)
dataframeDimensions
{-# INLINE dimensions #-}

-- | O(k) Get column names of the DataFrame in order of insertion.
columnNames :: DataFrame -> [T.Text]
columnNames :: DataFrame -> [Text]
columnNames = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd)([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices
{-# INLINE columnNames #-}

-- | /O(n)/ Adds a vector to the dataframe.
insertVector ::
  forall a.
  Columnable a =>
  -- | Column Name
  T.Text ->
  -- | Vector to add to column
  V.Vector a ->
  -- | DataFrame to add column to
  DataFrame ->
  DataFrame
insertVector :: forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
xs)
{-# INLINE insertVector #-}

cloneColumn :: T.Text -> T.Text -> DataFrame -> DataFrame
cloneColumn :: Text -> Text -> DataFrame -> DataFrame
cloneColumn Text
original Text
new DataFrame
df = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe (DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
original Text
"cloneColumn" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)) (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
  Column
column <- Text -> DataFrame -> Maybe Column
getColumn Text
original DataFrame
df
  DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
new Column
column DataFrame
df

-- | /O(n)/ Adds an unboxed vector to the dataframe.
insertUnboxedVector ::
  forall a.
  (Columnable a, VU.Unbox a) =>
  -- | Column Name
  T.Text ->
  -- | Unboxed vector to add to column
  VU.Vector a ->
  -- | DataFrame to add to column
  DataFrame ->
  DataFrame
insertUnboxedVector :: forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector a
xs)

-- -- | /O(n)/ Add a column to the dataframe. Not meant for external use.
insertColumn ::
  -- | Column Name
  T.Text ->
  -- | Column to add
  Column ->
  -- | DataFrame to add to column
  DataFrame ->
  DataFrame
insertColumn :: Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
d = let
    (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
    n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Column -> Int
columnLength Column
column) Int
r
  in case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name (DataFrame -> Map Text Int
columnIndices DataFrame
d) of
    Just Int
i  -> Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> [(Int, Column)] -> Vector Column
forall a. Vector a -> [(Int, a)] -> Vector a
V.// [(Int
i, Column
column)])) (DataFrame -> Map Text Int
columnIndices DataFrame
d) (Int
n, Int
c)
    Maybe Int
Nothing -> Vector Column -> Map Text Int -> (Int, Int) -> DataFrame
DataFrame ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> Column -> Vector Column
forall a. Vector a -> a -> Vector a
`V.snoc` Column
column)) (Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
name Int
c (DataFrame -> Map Text Int
columnIndices DataFrame
d)) (Int
n, Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | /O(k)/ Add a column to the dataframe providing a default.
-- This constructs a new vector and also may convert it
-- to an unboxed vector if necessary. Since columns are usually
-- large the runtime is dominated by the length of the list, k.
insertVectorWithDefault ::
  forall a.
  (Columnable a) =>
  -- | Default Value
  a ->
  -- | Column name
  T.Text ->
  -- | Data to add to column
  V.Vector a ->
  -- | DataFrame to add to column
  DataFrame ->
  DataFrame
insertVectorWithDefault :: forall a.
Columnable a =>
a -> Text -> Vector a -> DataFrame -> DataFrame
insertVectorWithDefault a
defaultValue Text
name Vector a
xs DataFrame
d =
  let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
      values :: Vector a
values = Vector a
xs Vector a -> Vector a -> Vector a
forall a. Vector a -> Vector a -> Vector a
V.++ Int -> a -> Vector a
forall a. Int -> a -> Vector a
V.replicate (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
xs) a
defaultValue
   in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
values) DataFrame
d

rename :: T.Text -> T.Text -> DataFrame -> DataFrame
rename :: Text -> Text -> DataFrame -> DataFrame
rename Text
orig Text
new DataFrame
df = (DataFrameException -> DataFrame)
-> (DataFrame -> DataFrame)
-> Either DataFrameException DataFrame
-> DataFrame
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrame -> DataFrame
forall a. a -> a
id (Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df)

renameMany :: [(T.Text, T.Text)] -> DataFrame -> DataFrame
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
renameMany [(Text, Text)]
replacements DataFrame
df = ((Text, Text) -> DataFrame -> DataFrame)
-> [(Text, Text)] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold ((Text -> Text -> DataFrame -> DataFrame)
-> (Text, Text) -> DataFrame -> DataFrame
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Text -> DataFrame -> DataFrame
rename) [(Text, Text)]
replacements DataFrame
df

renameSafe :: T.Text -> T.Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df = Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a. a -> Maybe a -> a
fromMaybe (DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
orig Text
"rename" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)) (Maybe (Either DataFrameException DataFrame)
 -> Either DataFrameException DataFrame)
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ do
  Int
columnIndex <- Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
  let origRemoved :: Map Text Int
origRemoved = Text -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> Map k a -> Map k a
M.delete Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
  let newAdded :: Map Text Int
newAdded = Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
new Int
columnIndex Map Text Int
origRemoved
  Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Either DataFrameException DataFrame
forall a b. b -> Either a b
Right DataFrame
df { columnIndices = newAdded })

-- | O(1) Get the number of elements in a given column.
columnSize :: T.Text -> DataFrame -> Maybe Int
columnSize :: Text -> DataFrame -> Maybe Int
columnSize Text
name DataFrame
df = Column -> Int
columnLength (Column -> Int) -> Maybe Column -> Maybe Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df

data ColumnInfo = ColumnInfo {
    ColumnInfo -> Text
nameOfColumn :: !T.Text,
    ColumnInfo -> Int
nonNullValues :: !Int,
    ColumnInfo -> Int
nullValues :: !Int,
    ColumnInfo -> Int
partiallyParsedValues :: !Int,
    ColumnInfo -> Int
uniqueValues :: !Int,
    ColumnInfo -> Text
typeOfColumn :: !T.Text
  }

-- | O(n) Returns the number of non-null columns in the dataframe and the type associated
-- with each column.
describeColumns :: DataFrame -> DataFrame
describeColumns :: DataFrame -> DataFrame
describeColumns DataFrame
df = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Column Name" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
nameOfColumn [ColumnInfo]
infos))
                      DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Non-null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nonNullValues [ColumnInfo]
infos))
                      DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nullValues [ColumnInfo]
infos))
                      DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Partially parsed" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
partiallyParsedValues [ColumnInfo]
infos))
                      DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Unique Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
uniqueValues [ColumnInfo]
infos))
                      DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Type" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
typeOfColumn [ColumnInfo]
infos))
  where
    infos :: [ColumnInfo]
infos = (ColumnInfo -> ColumnInfo -> Ordering)
-> [ColumnInfo] -> [ColumnInfo]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (ColumnInfo -> Int) -> ColumnInfo -> ColumnInfo -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ColumnInfo -> Int
nonNullValues) (([ColumnInfo] -> Int -> Column -> [ColumnInfo])
-> [ColumnInfo] -> Vector Column -> [ColumnInfo]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [] (DataFrame -> Vector Column
columns DataFrame
df)) :: [ColumnInfo]
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    columnName :: Int -> Maybe Text
columnName Int
i = Int -> Map Int Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Int
i Map Int Text
indexMap
    go :: [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [ColumnInfo]
acc Int
i col :: Column
col@(OptionalColumn (Vector (Maybe a)
c :: V.Vector a)) = let
        cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
        countNulls :: Int
countNulls = Column -> Int
nulls Column
col
        countPartial :: Int
countPartial = Column -> Int
partiallyParsed Column
col
        columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (TypeRep (Maybe a) -> String) -> TypeRep (Maybe a) -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
        unique :: Int
unique = Set (Maybe a) -> Int
forall a. Set a -> Int
S.size (Set (Maybe a) -> Int) -> Set (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Set (Maybe a) -> Set (Maybe a))
-> Set (Maybe a) -> Vector (Maybe a) -> Set (Maybe a)
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr Maybe a -> Set (Maybe a) -> Set (Maybe a)
forall a. Ord a => a -> Set a -> Set a
S.insert Set (Maybe a)
forall a. Set a
S.empty Vector (Maybe a)
c
      in if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname then [ColumnInfo]
acc else Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
countNulls) Int
countNulls Int
countPartial Int
unique Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(BoxedColumn (Vector a
c :: V.Vector a)) = let
        cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
        countPartial :: Int
countPartial = Column -> Int
partiallyParsed Column
col
        columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (TypeRep a -> String) -> TypeRep a -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
        unique :: Int
unique = Set a -> Int
forall a. Set a -> Int
S.size (Set a -> Int) -> Set a -> Int
forall a b. (a -> b) -> a -> b
$ (a -> Set a -> Set a) -> Set a -> Vector a -> Set a
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
S.insert Set a
forall a. Set a
S.empty Vector a
c
      in if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname then [ColumnInfo]
acc else Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col) Int
0 Int
countPartial Int
unique Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(UnboxedColumn Vector a
c) = let
        cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
        columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Column -> String
columnTypeString Column
col
        unique :: Int
unique = Set a -> Int
forall a. Set a -> Int
S.size (Set a -> Int) -> Set a -> Int
forall a b. (a -> b) -> a -> b
$ (a -> Set a -> Set a) -> Set a -> Vector a -> Set a
forall (v :: * -> *) a b.
Vector v a =>
(a -> b -> b) -> b -> v a -> b
VG.foldr a -> Set a -> Set a
forall a. Ord a => a -> Set a -> Set a
S.insert Set a
forall a. Set a
S.empty Vector a
c
        -- Unboxed columns cannot have nulls since Maybe
        -- is not an instance of Unbox a
      in if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname then [ColumnInfo]
acc else Text -> Int -> Int -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col) Int
0 Int
0 Int
unique Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc

nulls :: Column -> Int
nulls :: Column -> Int
nulls (OptionalColumn Vector (Maybe a)
xs) = Vector (Maybe a) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe a) -> Int) -> Vector (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Bool) -> Vector (Maybe a) -> Vector (Maybe a)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe a -> Bool
forall a. Maybe a -> Bool
isNothing Vector (Maybe a)
xs
nulls (BoxedColumn (Vector a
xs :: V.Vector a)) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
  Just a :~: Text
Refl -> Vector Text -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector Text -> Int) -> Vector Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Text -> Bool
isNullish Vector a
Vector Text
xs
  Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
    Just a :~: String
Refl -> Vector String -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector String -> Int) -> Vector String -> Int
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> Vector String -> Vector String
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter (Text -> Bool
isNullish (Text -> Bool) -> (String -> Text) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) Vector a
Vector String
xs
    Maybe (a :~: String)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
      App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
          Just a :~~: Maybe
HRefl -> Vector (Maybe b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe b) -> Int) -> Vector (Maybe b) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe b -> Bool) -> Vector (Maybe b) -> Vector (Maybe b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe b -> Bool
forall a. Maybe a -> Bool
isNothing Vector a
Vector (Maybe b)
xs
          Maybe (a :~~: Maybe)
Nothing -> Int
0
      TypeRep a
_ -> Int
0
nulls Column
_ = Int
0

partiallyParsed :: Column -> Int
partiallyParsed :: Column -> Int
partiallyParsed (BoxedColumn (Vector a
xs :: V.Vector a)) =
  case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
    App (App TypeRep a
tycon TypeRep b
t1) TypeRep b
t2 -> case TypeRep a -> TypeRep Either -> Maybe (a :~~: Either)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
tycon (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> * -> *). Typeable a => TypeRep a
typeRep @Either) of
      Just a :~~: Either
HRefl -> Vector (Either b b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Either b b) -> Int) -> Vector (Either b b) -> Int
forall a b. (a -> b) -> a -> b
$ (Either b b -> Bool) -> Vector (Either b b) -> Vector (Either b b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Either b b -> Bool
forall a b. Either a b -> Bool
isLeft Vector a
Vector (Either b b)
xs
      Maybe (a :~~: Either)
Nothing -> Int
0
    TypeRep a
_ -> Int
0
partiallyParsed Column
_ = Int
0

fromNamedColumns :: [(T.Text, Column)] -> DataFrame
fromNamedColumns :: [(Text, Column)] -> DataFrame
fromNamedColumns = (DataFrame -> (Text, Column) -> DataFrame)
-> DataFrame -> [(Text, Column)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Text
name, Column
column) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
df) DataFrame
empty

fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns = [(Text, Column)] -> DataFrame
fromNamedColumns ([(Text, Column)] -> DataFrame)
-> ([Column] -> [(Text, Column)]) -> [Column] -> DataFrame
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Column] -> [(Text, Column)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Integer -> Text) -> [Integer] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Integer -> String) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> String
forall a. Show a => a -> String
show) [Integer
0..])

-- | O (k * n) Counts the occurences of each value in a given column.
valueCounts :: forall a. (Columnable a) => T.Text -> DataFrame -> [(a, Int)]
valueCounts :: forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
      Maybe Column
Nothing -> DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"valueCounts" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
      Just (BoxedColumn (Vector a
column' :: V.Vector c)) ->
        let
          column :: Map a Int
column = (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty Vector a
column'
        in case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
              Maybe (a :~: a)
Nothing -> DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
                                                          { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                                          , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                                          , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                                          , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                                          })
              Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column
      Just (OptionalColumn (Vector (Maybe a)
column' :: V.Vector c)) ->
        let
          column :: Map (Maybe a) Int
column = (Map (Maybe a) Int -> Maybe a -> Map (Maybe a) Int)
-> Map (Maybe a) Int -> Vector (Maybe a) -> Map (Maybe a) Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map (Maybe a) Int
m Maybe a
v -> (Int -> Int -> Int)
-> Maybe a -> Int -> Map (Maybe a) Int -> Map (Maybe a) Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Maybe a
v (Int
1 :: Int) Map (Maybe a) Int
m) Map (Maybe a) Int
forall k a. Map k a
M.empty Vector (Maybe a)
column'
        in case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
              Maybe (a :~: Maybe a)
Nothing -> DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a (Maybe a) -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
                                                          { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                                          , expectedType :: Either String (TypeRep (Maybe a))
expectedType = TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. b -> Either a b
Right (TypeRep (Maybe a) -> Either String (TypeRep (Maybe a)))
-> TypeRep (Maybe a) -> Either String (TypeRep (Maybe a))
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                                          , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                                          , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                                          })
              Just a :~: Maybe a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map (Maybe a) Int
column
      Just (UnboxedColumn (Vector a
column' :: VU.Vector c)) -> let
          column :: Map a Int
column = (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column')
        in case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c) of
          Maybe (a :~: a)
Nothing -> DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw (DataFrameException -> [(a, Int)])
-> DataFrameException -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a a -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
                                                          { userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
                                                          , expectedType :: Either String (TypeRep a)
expectedType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @c
                                                          , errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
                                                          , callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"valueCounts"
                                                          })
          Just a :~: a
Refl -> Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
Map a Int
column

fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold :: forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold a -> DataFrame -> DataFrame
f [a]
xs DataFrame
acc = (DataFrame -> a -> DataFrame) -> DataFrame -> [a] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((a -> DataFrame -> DataFrame) -> DataFrame -> a -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> DataFrame -> DataFrame
f) DataFrame
acc [a]
xs