{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Core where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Map.Strict as MS
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU

import Control.Exception (throw)
import Data.Either
import qualified Data.Foldable as Fold
import Data.Function (on, (&))
import Data.Maybe
import Data.Type.Equality (TestEquality (..))
import DataFrame.Errors
import DataFrame.Internal.Column (
    Column (..),
    Columnable,
    TypedColumn (..),
    columnLength,
    columnTypeString,
    expandColumn,
    fromList,
    fromVector,
    toDoubleVector,
    toFloatVector,
    toIntVector,
    toUnboxedVector,
    toVector,
 )
import DataFrame.Internal.DataFrame (
    DataFrame (..),
    columnIndices,
    empty,
    getColumn,
 )
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Parsing (isNullish)
import DataFrame.Internal.Row (Any, mkColumnFromRow)
import Type.Reflection
import Prelude hiding (null)

{- | O(1) Get DataFrame dimensions i.e. (rows, columns)

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.dimensions df

(100, 3)
@
-}
dimensions :: DataFrame -> (Int, Int)
dimensions :: DataFrame -> (Int, Int)
dimensions = DataFrame -> (Int, Int)
dataframeDimensions
{-# INLINE dimensions #-}

{- | O(1) Get number of rows in a dataframe.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nRows df
100
@
-}
nRows :: DataFrame -> Int
nRows :: DataFrame -> Int
nRows = (Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int)
-> (DataFrame -> (Int, Int)) -> DataFrame -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Int, Int)
dataframeDimensions

{- | O(1) Get number of columns in a dataframe.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nColumns df
3
@
-}
nColumns :: DataFrame -> Int
nColumns :: DataFrame -> Int
nColumns = (Int, Int) -> Int
forall a b. (a, b) -> b
snd ((Int, Int) -> Int)
-> (DataFrame -> (Int, Int)) -> DataFrame -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Int, Int)
dataframeDimensions

{- | O(k) Get column names of the DataFrame in order of insertion.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.columnNames df

["a", "b", "c"]
@
-}
columnNames :: DataFrame -> [T.Text]
columnNames :: DataFrame -> [Text]
columnNames = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices
{-# INLINE columnNames #-}

{- | Adds a vector to the dataframe. If the vector has less elements than the dataframe and the dataframe is not empty
the vector is converted to type `Maybe a` filled with `Nothing` to match the size of the dataframe. Similarly,
if the vector has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe <Type>` and filled with `Nothing`.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> D.insertVector "numbers" (V.fromList [(1 :: Int)..10]) D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

@
-}
insertVector ::
    forall a.
    (Columnable a) =>
    -- | Column Name
    T.Text ->
    -- | Vector to add to column
    V.Vector a ->
    -- | DataFrame to add column to
    DataFrame ->
    DataFrame
insertVector :: forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
xs)
{-# INLINE insertVector #-}

{- | Adds a foldable collection to the dataframe. If the collection has less elements than the
dataframe and the dataframe is not empty
the collection is converted to type `Maybe a` filled with `Nothing` to match the size of the dataframe. Similarly,
if the collection has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe <Type>` and filled with `Nothing`.

Be careful not to insert infinite collections with this function as that will crash the program.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> D.insert "numbers" [(1 :: Int)..10] D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

@
-}
insert ::
    forall a t.
    (Columnable a, Foldable t) =>
    -- | Column Name
    T.Text ->
    -- | Sequence to add to dataframe
    t a ->
    -- | DataFrame to add column to
    DataFrame ->
    DataFrame
insert :: forall a (t :: * -> *).
(Columnable a, Foldable t) =>
Text -> t a -> DataFrame -> DataFrame
insert Text
name t a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name ([a] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((a -> [a] -> [a]) -> [a] -> t a -> [a]
forall a b. (a -> b -> b) -> b -> t a -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
Fold.foldr' (:) [] t a
xs)) -- TODO: Do reflection on container type so we can sometimes avoid the list construction.
{-# INLINE insert #-}

{- | Adds a vector to the dataframe and pads it with a default value if it has less elements than the number of rows.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified Data.Vector as V
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertVectorWithDefault 0 "numbers" (V.fromList [(1 :: Int),2,3]) df

-------------
 x  | numbers
----|--------
Int |   Int
----|--------
1   | 1
2   | 2
3   | 3
4   | 0
5   | 0
6   | 0
7   | 0
8   | 0
9   | 0
10  | 0

@
-}
insertVectorWithDefault ::
    forall a.
    (Columnable a) =>
    -- | Default Value
    a ->
    -- | Column name
    T.Text ->
    -- | Data to add to column
    V.Vector a ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertVectorWithDefault :: forall a.
Columnable a =>
a -> Text -> Vector a -> DataFrame -> DataFrame
insertVectorWithDefault a
defaultValue Text
name Vector a
xs DataFrame
d =
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        values :: Vector a
values = Vector a
xs Vector a -> Vector a -> Vector a
forall a. Vector a -> Vector a -> Vector a
V.++ Int -> a -> Vector a
forall a. Int -> a -> Vector a
V.replicate (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Vector a -> Int
forall a. Vector a -> Int
V.length Vector a
xs) a
defaultValue
     in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector a
values) DataFrame
d

{- | Adds a list to the dataframe and pads it with a default value if it has less elements than the number of rows.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertWithDefault 0 "numbers" [(1 :: Int),2,3] df

-------------
 x  | numbers
----|--------
Int |   Int
----|--------
1   | 1
2   | 2
3   | 3
4   | 0
5   | 0
6   | 0
7   | 0
8   | 0
9   | 0
10  | 0

@
-}
insertWithDefault ::
    forall a t.
    (Columnable a, Foldable t) =>
    -- | Default Value
    a ->
    -- | Column name
    T.Text ->
    -- | Data to add to column
    t a ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertWithDefault :: forall a (t :: * -> *).
(Columnable a, Foldable t) =>
a -> Text -> t a -> DataFrame -> DataFrame
insertWithDefault a
defaultValue Text
name t a
xs DataFrame
d =
    let (Int
rows, Int
_) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        xs' :: [a]
xs' = (a -> [a] -> [a]) -> [a] -> t a -> [a]
forall a b. (a -> b -> b) -> b -> t a -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
Fold.foldr' (:) [] t a
xs
        values :: [a]
values = [a]
xs' [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ Int -> a -> [a]
forall a. Int -> a -> [a]
replicate (Int
rows Int -> Int -> Int
forall a. Num a => a -> a -> a
- [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs') a
defaultValue
     in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name ([a] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList [a]
values) DataFrame
d

{- | /O(n)/ Adds an unboxed vector to the dataframe.

Same as insertVector but takes an unboxed vector. If you insert a vector of numbers through insertVector it will either way be converted
into an unboxed vector so this function saves that extra work/conversion.
-}
insertUnboxedVector ::
    forall a.
    (Columnable a, VU.Unbox a) =>
    -- | Column Name
    T.Text ->
    -- | Unboxed vector to add to column
    VU.Vector a ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertUnboxedVector :: forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name Vector a
xs = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (Vector a -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector a
xs)

{- | /O(n)/ Add a column to the dataframe.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> D.insertColumn "numbers" (D.fromList [(1 :: Int)..10]) D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

@
-}
insertColumn ::
    -- | Column Name
    T.Text ->
    -- | Column to add
    Column ->
    -- | DataFrame to add the column to
    DataFrame ->
    DataFrame
insertColumn :: Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
d =
    let
        (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
        n :: Int
n = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Column -> Int
columnLength Column
column) Int
r
     in
        case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
name (DataFrame -> Map Text Int
columnIndices DataFrame
d) of
            Just Int
i ->
                Vector Column
-> Map Text Int -> (Int, Int) -> Map Text UExpr -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> [(Int, Column)] -> Vector Column
forall a. Vector a -> [(Int, a)] -> Vector a
V.// [(Int
i, Column
column)]))
                    (DataFrame -> Map Text Int
columnIndices DataFrame
d)
                    (Int
n, Int
c)
                    Map Text UExpr
forall k a. Map k a
M.empty
            Maybe Int
Nothing ->
                Vector Column
-> Map Text Int -> (Int, Int) -> Map Text UExpr -> DataFrame
DataFrame
                    ((Column -> Column) -> Vector Column -> Vector Column
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Column -> Column
expandColumn Int
n) (DataFrame -> Vector Column
columns DataFrame
d Vector Column -> Column -> Vector Column
forall a. Vector a -> a -> Vector a
`V.snoc` Column
column))
                    (Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
name Int
c (DataFrame -> Map Text Int
columnIndices DataFrame
d))
                    (Int
n, Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                    Map Text UExpr
forall k a. Map k a
M.empty

{- | /O(n)/ Clones a column and places it under a new name in the dataframe.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified Data.Vector as V
>>> df = insertVector "numbers" (V.fromList [1..10]) D.empty
>>> D.cloneColumn "numbers" "others" df

-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 1
 2       | 2
 3       | 3
 4       | 4
 5       | 5
 6       | 6
 7       | 7
 8       | 8
 9       | 9
 10      | 10

@
-}
cloneColumn :: T.Text -> T.Text -> DataFrame -> DataFrame
cloneColumn :: Text -> Text -> DataFrame -> DataFrame
cloneColumn Text
original Text
new DataFrame
df = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    ( DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
        Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
original Text
"cloneColumn" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    )
    (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Column
column <- Text -> DataFrame -> Maybe Column
getColumn Text
original DataFrame
df
        DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
new Column
column DataFrame
df

{- | /O(n)/ Renames a single column.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> df = insertVector "numbers" (V.fromList [1..10]) D.empty
>>> D.rename "numbers" "others" df

-------
 others
-------
  Int
-------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

@
-}
rename :: T.Text -> T.Text -> DataFrame -> DataFrame
rename :: Text -> Text -> DataFrame -> DataFrame
rename Text
orig Text
new DataFrame
df = (DataFrameException -> DataFrame)
-> (DataFrame -> DataFrame)
-> Either DataFrameException DataFrame
-> DataFrame
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrame -> DataFrame
forall a. a -> a
id (Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df)

{- | /O(n)/ Renames many columns.

==== __Example__
@
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> df

-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 11
 2       | 12
 3       | 13
 4       | 14
 5       | 15
 6       | 16
 7       | 17
 8       | 18
 9       | 19
 10      | 20

>>> D.renameMany [("numbers", "first_10"), ("others", "next_10")] df

-------------------
 first_10 | next_10
----------|--------
   Int    |   Int
----------|--------
 1        | 11
 2        | 12
 3        | 13
 4        | 14
 5        | 15
 6        | 16
 7        | 17
 8        | 18
 9        | 19
 10       | 20

@
-}
renameMany :: [(T.Text, T.Text)] -> DataFrame -> DataFrame
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
renameMany = ((Text, Text) -> DataFrame -> DataFrame)
-> [(Text, Text)] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold ((Text -> Text -> DataFrame -> DataFrame)
-> (Text, Text) -> DataFrame -> DataFrame
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Text -> DataFrame -> DataFrame
rename)

renameSafe ::
    T.Text -> T.Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
renameSafe Text
orig Text
new DataFrame
df = Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a. a -> Maybe a -> a
fromMaybe
    (DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
orig Text
"rename" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df))
    (Maybe (Either DataFrameException DataFrame)
 -> Either DataFrameException DataFrame)
-> Maybe (Either DataFrameException DataFrame)
-> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ do
        Int
columnIndex <- Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let origRemoved :: Map Text Int
origRemoved = Text -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> Map k a -> Map k a
M.delete Text
orig (DataFrame -> Map Text Int
columnIndices DataFrame
df)
        let newAdded :: Map Text Int
newAdded = Text -> Int -> Map Text Int -> Map Text Int
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Text
new Int
columnIndex Map Text Int
origRemoved
        Either DataFrameException DataFrame
-> Maybe (Either DataFrameException DataFrame)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (DataFrame -> Either DataFrameException DataFrame
forall a b. b -> Either a b
Right DataFrame
df{columnIndices = newAdded})

data ColumnInfo = ColumnInfo
    { ColumnInfo -> Text
nameOfColumn :: !T.Text
    , ColumnInfo -> Int
nonNullValues :: !Int
    , ColumnInfo -> Int
nullValues :: !Int
    , ColumnInfo -> Text
typeOfColumn :: !T.Text
    }

{- | O(n * k ^ 2) Returns the number of non-null columns in the dataframe and the type associated with each column.

==== __Example__
@
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> D.describeColumns df

--------------------------------------------------------
 Column Name | # Non-null Values | # Null Values | Type
-------------|-------------------|---------------|-----
    Text     |        Int        |      Int      | Text
-------------|-------------------|---------------|-----
 others      | 10                | 0             | Int
 numbers     | 10                | 0             | Int

@
-}
describeColumns :: DataFrame -> DataFrame
describeColumns :: DataFrame -> DataFrame
describeColumns DataFrame
df =
    DataFrame
empty
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Column Name" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
nameOfColumn [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Non-null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nonNullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"# Null Values" ([Int] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Int) -> [ColumnInfo] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Int
nullValues [ColumnInfo]
infos))
        DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Column -> DataFrame -> DataFrame
insertColumn Text
"Type" ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ((ColumnInfo -> Text) -> [ColumnInfo] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ColumnInfo -> Text
typeOfColumn [ColumnInfo]
infos))
  where
    infos :: [ColumnInfo]
infos =
        (ColumnInfo -> ColumnInfo -> Ordering)
-> [ColumnInfo] -> [ColumnInfo]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> (ColumnInfo -> Int) -> ColumnInfo -> ColumnInfo -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ColumnInfo -> Int
nonNullValues) (([ColumnInfo] -> Int -> Column -> [ColumnInfo])
-> [ColumnInfo] -> Vector Column -> [ColumnInfo]
forall a b. (a -> Int -> b -> a) -> a -> Vector b -> a
V.ifoldl' [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [] (DataFrame -> Vector Column
columns DataFrame
df)) ::
            [ColumnInfo]
    indexMap :: Map Int Text
indexMap = [(Int, Text)] -> Map Int Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList (((Text, Int) -> (Int, Text)) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Text
a, Int
b) -> (Int
b, Text
a)) ([(Text, Int)] -> [(Int, Text)]) -> [(Text, Int)] -> [(Int, Text)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
    columnName :: Int -> Maybe Text
columnName Int
i = Int -> Map Int Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Int
i Map Int Text
indexMap
    go :: [ColumnInfo] -> Int -> Column -> [ColumnInfo]
go [ColumnInfo]
acc Int
i col :: Column
col@(OptionalColumn (Vector (Maybe a)
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            countNulls :: Int
countNulls = Column -> Int
nulls Column
col
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (TypeRep (Maybe a) -> String) -> TypeRep (Maybe a) -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
countNulls)
                        Int
countNulls
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(BoxedColumn (Vector a
c :: V.Vector a)) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (TypeRep a -> String) -> TypeRep a -> String
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
         in
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo
                        (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname)
                        (Column -> Int
columnLength Column
col)
                        Int
0
                        Text
columnType
                        ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc
    go [ColumnInfo]
acc Int
i col :: Column
col@(UnboxedColumn Vector a
c) =
        let
            cname :: Maybe Text
cname = Int -> Maybe Text
columnName Int
i
            columnType :: Text
columnType = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Column -> String
columnTypeString Column
col
         in
            -- Unboxed columns cannot have nulls since Maybe
            -- is not an instance of Unbox a
            if Maybe Text -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Text
cname
                then [ColumnInfo]
acc
                else
                    Text -> Int -> Int -> Text -> ColumnInfo
ColumnInfo (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
cname) (Column -> Int
columnLength Column
col) Int
0 Text
columnType ColumnInfo -> [ColumnInfo] -> [ColumnInfo]
forall a. a -> [a] -> [a]
: [ColumnInfo]
acc

nulls :: Column -> Int
nulls :: Column -> Int
nulls (OptionalColumn Vector (Maybe a)
xs) = Vector (Maybe a) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe a) -> Int) -> Vector (Maybe a) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Bool) -> Vector (Maybe a) -> Vector (Maybe a)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe a -> Bool
forall a. Maybe a -> Bool
isNothing Vector (Maybe a)
xs
nulls (BoxedColumn (Vector a
xs :: V.Vector a)) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
    Just a :~: Text
Refl -> Vector Text -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector Text -> Int) -> Vector Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Text -> Bool
isNullish Vector a
Vector Text
xs
    Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
        Just a :~: String
Refl -> Vector String -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector String -> Int) -> Vector String -> Int
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> Vector String -> Vector String
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter (Text -> Bool
isNullish (Text -> Bool) -> (String -> Text) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) Vector a
Vector String
xs
        Maybe (a :~: String)
Nothing -> case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
            App TypeRep a
t1 TypeRep b
t2 -> case TypeRep a -> TypeRep Maybe -> Maybe (a :~~: Maybe)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
t1 (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> *). Typeable a => TypeRep a
typeRep @Maybe) of
                Just a :~~: Maybe
HRefl -> Vector (Maybe b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Maybe b) -> Int) -> Vector (Maybe b) -> Int
forall a b. (a -> b) -> a -> b
$ (Maybe b -> Bool) -> Vector (Maybe b) -> Vector (Maybe b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Maybe b -> Bool
forall a. Maybe a -> Bool
isNothing Vector a
Vector (Maybe b)
xs
                Maybe (a :~~: Maybe)
Nothing -> Int
0
            TypeRep a
_ -> Int
0
nulls Column
_ = Int
0

partiallyParsed :: Column -> Int
partiallyParsed :: Column -> Int
partiallyParsed (BoxedColumn (Vector a
xs :: V.Vector a)) =
    case forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a of
        App (App TypeRep a
tycon TypeRep b
t1) TypeRep b
t2 -> case TypeRep a -> TypeRep Either -> Maybe (a :~~: Either)
forall k1 k2 (a :: k1) (b :: k2).
TypeRep a -> TypeRep b -> Maybe (a :~~: b)
eqTypeRep TypeRep a
tycon (forall {k} (a :: k). Typeable a => TypeRep a
forall (a :: * -> * -> *). Typeable a => TypeRep a
typeRep @Either) of
            Just a :~~: Either
HRefl -> Vector (Either b b) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector (Either b b) -> Int) -> Vector (Either b b) -> Int
forall a b. (a -> b) -> a -> b
$ (Either b b -> Bool) -> Vector (Either b b) -> Vector (Either b b)
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
VG.filter Either b b -> Bool
forall a b. Either a b -> Bool
isLeft Vector a
Vector (Either b b)
xs
            Maybe (a :~~: Either)
Nothing -> Int
0
        TypeRep a
_ -> Int
0
partiallyParsed Column
_ = Int
0

{- | Creates a dataframe from a list of tuples with name and column.

==== __Example__
@
>>> df = D.fromNamedColumns [("numbers", D.fromList [1..10]), ("others", D.fromList [11..20])]
>>> df
-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 11
 2       | 12
 3       | 13
 4       | 14
 5       | 15
 6       | 16
 7       | 17
 8       | 18
 9       | 19
 10      | 20

@
-}
fromNamedColumns :: [(T.Text, Column)] -> DataFrame
fromNamedColumns :: [(Text, Column)] -> DataFrame
fromNamedColumns = (DataFrame -> (Text, Column) -> DataFrame)
-> DataFrame -> [(Text, Column)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Text
name, Column
column) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
column DataFrame
df) DataFrame
empty

{- | Create a dataframe from a list of columns. The column names are "0", "1"... etc.
Useful for quick exploration but you should probably always rename the columns after
or drop the ones you don't want.

==== __Example__
@
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]
>>> df
-----------------
  0  |  1
-----|----
 Int | Int
-----|----
 1   | 11
 2   | 12
 3   | 13
 4   | 14
 5   | 15
 6   | 16
 7   | 17
 8   | 18
 9   | 19
 10  | 20

@
-}
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns :: [Column] -> DataFrame
fromUnnamedColumns = [(Text, Column)] -> DataFrame
fromNamedColumns ([(Text, Column)] -> DataFrame)
-> ([Column] -> [(Text, Column)]) -> [Column] -> DataFrame
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Column] -> [(Text, Column)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Integer -> Text) -> [Integer] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Integer -> String) -> Integer -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> String
forall a. Show a => a -> String
show) [Integer
0 ..])

{- | Create a dataframe from a list of column names and rows.

==== __Example__
@
>>> df = D.fromRows ["A", "B"] [[D.toAny 1, D.toAny 11], [D.toAny 2, D.toAny 12], [D.toAny 3, D.toAny 13]]

>>> df

----------
  A  |  B
-----|----
 Int | Int
-----|----
 1   | 11
 2   | 12
 3   | 13

@
-}
fromRows :: [T.Text] -> [[Any]] -> DataFrame
fromRows :: [Text] -> [[Any]] -> DataFrame
fromRows [Text]
names [[Any]]
rows =
    (DataFrame -> Int -> DataFrame) -> DataFrame -> [Int] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl'
        (\DataFrame
df Int
i -> Text -> Column -> DataFrame -> DataFrame
insertColumn ([Text]
names [Text] -> Int -> Text
forall a. HasCallStack => [a] -> Int -> a
!! Int
i) (Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows) DataFrame
df)
        DataFrame
empty
        [Int
0 .. [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
names Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]

{- | O (k * n) Counts the occurences of each value in a given column.

==== __Example__
@
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

>>> D.valueCounts @Int "0" df

[(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1)]

@
-}
valueCounts ::
    forall a. (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Int)]
valueCounts :: forall a.
(Ord a, Columnable a) =>
Expr a -> DataFrame -> [(a, Int)]
valueCounts Expr a
expr DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector Expr a
expr DataFrame
df of
    Left DataFrameException
e -> DataFrameException -> [(a, Int)]
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right Vector a
column' ->
        let
            column :: Map a Int
column = (Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty Vector a
column'
         in
            Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList Map a Int
column

{- | O (k * n) Shows the proportions of each value in a given column.

==== __Example__
@
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

>>> D.valueCounts @Int "0" df

[(1,0.1),(2,0.1),(3,0.1),(4,0.1),(5,0.1),(6,0.1),(7,0.1),(8,0.1),(9,0.1),(10,0.1)]

@
-}
valueProportions ::
    forall a. (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Double)]
valueProportions :: forall a.
(Ord a, Columnable a) =>
Expr a -> DataFrame -> [(a, Double)]
valueProportions Expr a
expr DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector Expr a
expr DataFrame
df of
    Left DataFrameException
e -> DataFrameException -> [(a, Double)]
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right Vector a
column' ->
        let
            counts :: [(a, Int)]
counts =
                Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toAscList
                    ((Map a Int -> a -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Int
m a
v -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
MS.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
v (Int
1 :: Int) Map a Int
m) Map a Int
forall k a. Map k a
M.empty Vector a
column')
            total :: Double
total = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((a, Int) -> Int) -> [(a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (a, Int) -> Int
forall a b. (a, b) -> b
snd [(a, Int)]
counts))
         in
            ((a, Int) -> (a, Double)) -> [(a, Int)] -> [(a, Double)]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Double) -> (a, Int) -> (a, Double)
forall a b. (a -> b) -> (a, a) -> (a, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) (Double -> Double) -> (Int -> Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral)) [(a, Int)]
counts

{- | A left fold for dataframes that takes the dataframe as the last object.
This makes it easier to chain operations.

==== __Example__
@
>>> df = D.fromNamedColumns [("x", D.fromList [1..100]), ("y", D.fromList [11..110])]
>>> D.fold D.dropLast [1..5] df

---------
 x  |  y
----|----
Int | Int
----|----
1   | 11
2   | 12
3   | 13
4   | 14
5   | 15
6   | 16
7   | 17
8   | 18
9   | 19
10  | 20
11  | 21
12  | 22
13  | 23
14  | 24
15  | 25
16  | 26
17  | 27
18  | 28
19  | 29
20  | 30

Showing 20 rows out of 85

@
-}
fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold :: forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold a -> DataFrame -> DataFrame
f [a]
xs DataFrame
acc = (DataFrame -> a -> DataFrame) -> DataFrame -> [a] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((a -> DataFrame -> DataFrame) -> DataFrame -> a -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> DataFrame -> DataFrame
f) DataFrame
acc [a]
xs

{- | Returns a dataframe as a two dimensional vector of floats.

Converts all columns in the dataframe to float vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to floats.
-}
toFloatMatrix ::
    DataFrame -> Either DataFrameException (V.Vector (VU.Vector Float))
toFloatMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Float))
toFloatMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Float))
 -> Column -> Either DataFrameException (Vector (Vector Float)))
-> Either DataFrameException (Vector (Vector Float))
-> Vector Column
-> Either DataFrameException (Vector (Vector Float))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Float))
acc Column
c -> Vector (Vector Float) -> Vector Float -> Vector (Vector Float)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Float) -> Vector Float -> Vector (Vector Float))
-> Either DataFrameException (Vector (Vector Float))
-> Either
     DataFrameException (Vector Float -> Vector (Vector Float))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Float))
acc Either DataFrameException (Vector Float -> Vector (Vector Float))
-> Either DataFrameException (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Float)
toFloatVector Column
c)
    (Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b. b -> Either a b
Right Vector (Vector Float)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Float)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Float))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Float)
m ->
        Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Float)
 -> Either DataFrameException (Vector (Vector Float)))
-> Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Float) -> Vector (Vector Float)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Float -> Int -> Vector Float)
-> Vector Float -> [Int] -> Vector Float
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Float
acc Int
j -> Vector Float
acc Vector Float -> Float -> Vector Float
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Float)
m Vector (Vector Float) -> Int -> Vector Float
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Float -> Int -> Float
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Float
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Float) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Float)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Returns a dataframe as a two dimensional vector of doubles.

Converts all columns in the dataframe to double vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to doubles.
-}
toDoubleMatrix ::
    DataFrame -> Either DataFrameException (V.Vector (VU.Vector Double))
toDoubleMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Double))
toDoubleMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Double))
 -> Column -> Either DataFrameException (Vector (Vector Double)))
-> Either DataFrameException (Vector (Vector Double))
-> Vector Column
-> Either DataFrameException (Vector (Vector Double))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Double))
acc Column
c -> Vector (Vector Double) -> Vector Double -> Vector (Vector Double)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Double) -> Vector Double -> Vector (Vector Double))
-> Either DataFrameException (Vector (Vector Double))
-> Either
     DataFrameException (Vector Double -> Vector (Vector Double))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Double))
acc Either DataFrameException (Vector Double -> Vector (Vector Double))
-> Either DataFrameException (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Double)
toDoubleVector Column
c)
    (Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b. b -> Either a b
Right Vector (Vector Double)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Double)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Double))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Double)
m ->
        Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Double)
 -> Either DataFrameException (Vector (Vector Double)))
-> Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Double) -> Vector (Vector Double)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Double -> Int -> Vector Double)
-> Vector Double -> [Int] -> Vector Double
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Double
acc Int
j -> Vector Double
acc Vector Double -> Double -> Vector Double
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Double)
m Vector (Vector Double) -> Int -> Vector Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Double
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Double) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Returns a dataframe as a two dimensional vector of ints.

Converts all columns in the dataframe to int vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to ints.
-}
toIntMatrix :: DataFrame -> Either DataFrameException (V.Vector (VU.Vector Int))
toIntMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Int))
toIntMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Int))
 -> Column -> Either DataFrameException (Vector (Vector Int)))
-> Either DataFrameException (Vector (Vector Int))
-> Vector Column
-> Either DataFrameException (Vector (Vector Int))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Int))
acc Column
c -> Vector (Vector Int) -> Vector Int -> Vector (Vector Int)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Int) -> Vector Int -> Vector (Vector Int))
-> Either DataFrameException (Vector (Vector Int))
-> Either DataFrameException (Vector Int -> Vector (Vector Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Int))
acc Either DataFrameException (Vector Int -> Vector (Vector Int))
-> Either DataFrameException (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Int)
toIntVector Column
c)
    (Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b. b -> Either a b
Right Vector (Vector Int)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Int)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Int))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Int)
m ->
        Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Int)
 -> Either DataFrameException (Vector (Vector Int)))
-> Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Int) -> Vector (Vector Int)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Int -> Int -> Vector Int)
-> Vector Int -> [Int] -> Vector Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Int
acc Int
j -> Vector Int
acc Vector Int -> Int -> Vector Int
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Int)
m Vector (Vector Int) -> Int -> Vector Int
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Int -> Int -> Int
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Int
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Int) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Int)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Get a specific column as a vector.

You must specify the type via type applications.

==== __Examples__

>>> columnAsVector (F.col @Int "age") df
Right [25, 30, 35, ...]

>>> columnAsVector (F.col @Text "name") df
Right ["Alice", "Bob", "Charlie", ...]
-}
columnAsVector ::
    forall a.
    (Columnable a) => Expr a -> DataFrame -> Either DataFrameException (V.Vector a)
columnAsVector :: forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just Column
col -> Column -> Either DataFrameException (Vector a)
forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector Column
col
    Maybe Column
Nothing ->
        DataFrameException -> Either DataFrameException (Vector a)
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException (Vector a))
-> DataFrameException -> Either DataFrameException (Vector a)
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"columnAsVector" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
columnAsVector Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> Either DataFrameException (Vector a)
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
col) -> Column -> Either DataFrameException (Vector a)
forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector Column
col

{- | Retrieves a column as an unboxed vector of 'Int' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to ints.
This may occur if the column contains non-numeric data or values outside the 'Int' range.
-}
columnAsIntVector ::
    (Columnable a, Num a) =>
    Expr a -> DataFrame -> Either DataFrameException (VU.Vector Int)
columnAsIntVector :: forall a.
(Columnable a, Num a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector Int)
columnAsIntVector (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just Column
col -> Column -> Either DataFrameException (Vector Int)
toIntVector Column
col
    Maybe Column
Nothing ->
        DataFrameException -> Either DataFrameException (Vector Int)
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException (Vector Int))
-> DataFrameException -> Either DataFrameException (Vector Int)
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"columnAsIntVector" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
columnAsIntVector Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> Either DataFrameException (Vector Int)
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
col) -> Column -> Either DataFrameException (Vector Int)
toIntVector Column
col

{- | Retrieves a column as an unboxed vector of 'Double' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to doubles.
This may occur if the column contains non-numeric data.
-}
columnAsDoubleVector ::
    (Columnable a, Num a) =>
    Expr a -> DataFrame -> Either DataFrameException (VU.Vector Double)
columnAsDoubleVector :: forall a.
(Columnable a, Num a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector Double)
columnAsDoubleVector (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just Column
col -> Column -> Either DataFrameException (Vector Double)
toDoubleVector Column
col
    Maybe Column
Nothing ->
        DataFrameException -> Either DataFrameException (Vector Double)
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException (Vector Double))
-> DataFrameException -> Either DataFrameException (Vector Double)
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"columnAsDoubleVector" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
columnAsDoubleVector Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> Either DataFrameException (Vector Double)
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
col) -> Column -> Either DataFrameException (Vector Double)
toDoubleVector Column
col

{- | Retrieves a column as an unboxed vector of 'Float' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to floats.
This may occur if the column contains non-numeric data.
-}
columnAsFloatVector ::
    (Columnable a, Num a) =>
    Expr a -> DataFrame -> Either DataFrameException (VU.Vector Float)
columnAsFloatVector :: forall a.
(Columnable a, Num a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector Float)
columnAsFloatVector (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just Column
col -> Column -> Either DataFrameException (Vector Float)
toFloatVector Column
col
    Maybe Column
Nothing ->
        DataFrameException -> Either DataFrameException (Vector Float)
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException (Vector Float))
-> DataFrameException -> Either DataFrameException (Vector Float)
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"columnAsFloatVector" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
columnAsFloatVector Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> Either DataFrameException (Vector Float)
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
col) -> Column -> Either DataFrameException (Vector Float)
toFloatVector Column
col

columnAsUnboxedVector ::
    forall a.
    (Columnable a, VU.Unbox a) =>
    Expr a -> DataFrame -> Either DataFrameException (VU.Vector a)
columnAsUnboxedVector :: forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just Column
col -> Column -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector Column
col
    Maybe Column
Nothing ->
        DataFrameException -> Either DataFrameException (Vector a)
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException (Vector a))
-> DataFrameException -> Either DataFrameException (Vector a)
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"columnAsFloatVector" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
columnAsUnboxedVector Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> Either DataFrameException (Vector a)
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
col) -> Column -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector Column
col
{-# SPECIALIZE columnAsUnboxedVector ::
    Expr Double -> DataFrame -> Either DataFrameException (VU.Vector Double)
    #-}
{-# INLINE columnAsUnboxedVector #-}

{- | Get a specific column as a list.

You must specify the type via type applications.

==== __Examples__

>>> columnAsList @Int "age" df
[25, 30, 35, ...]

>>> columnAsList @Text "name" df
["Alice", "Bob", "Charlie", ...]

==== __Throws__

* 'error' - if the column type doesn't match the requested type
-}
columnAsList :: forall a. (Columnable a) => Expr a -> DataFrame -> [a]
columnAsList :: forall a. Columnable a => Expr a -> DataFrame -> [a]
columnAsList Expr a
expr DataFrame
df = (DataFrameException -> [a])
-> (Vector a -> [a]) -> Either DataFrameException (Vector a) -> [a]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> [a]
forall a e. Exception e => e -> a
throw Vector a -> [a]
forall a. Vector a -> [a]
V.toList (Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector Expr a
expr DataFrame
df)