{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Internal.DataFrame where

import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU

import Control.Exception (throw)
import Data.Function (on)
import Data.List (sortBy, transpose, (\\))
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import DataFrame.Display.Terminal.PrettyPrint
import DataFrame.Errors
import DataFrame.Internal.Column
import Text.Printf
import Type.Reflection (typeRep)

data DataFrame = DataFrame
    { DataFrame -> Vector Column
columns :: V.Vector Column
    {- ^ Our main data structure stores a dataframe as
    a vector of columns. This improv
    -}
    , DataFrame -> Map Text Int
columnIndices :: M.Map T.Text Int
    -- ^ Keeps the column names in the order they were inserted in.
    , DataFrame -> (Int, Int)
dataframeDimensions :: (Int, Int)
    -- ^ (rows, columns)
    }

{- | A record that contains information about how and what
rows are grouped in the dataframe. This can only be used with
`aggregate`.
-}
data GroupedDataFrame = Grouped
    { GroupedDataFrame -> DataFrame
fullDataframe :: DataFrame
    , GroupedDataFrame -> [Text]
groupedColumns :: [T.Text]
    , GroupedDataFrame -> Vector Int
valueIndices :: VU.Vector Int
    , GroupedDataFrame -> Vector Int
offsets :: VU.Vector Int
    }

instance Show GroupedDataFrame where
    show :: GroupedDataFrame -> String
show (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) =
        String -> String -> ShowS
forall r. PrintfType r => String -> r
printf
            String
"{ keyColumns: %s groupedColumns: %s }"
            ([Text] -> String
forall a. Show a => a -> String
show [Text]
cols)
            ([Text] -> String
forall a. Show a => a -> String
show (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (DataFrame -> Map Text Int
columnIndices DataFrame
df) [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
\\ [Text]
cols))

instance Eq GroupedDataFrame where
    == :: GroupedDataFrame -> GroupedDataFrame -> Bool
(==) (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) (Grouped DataFrame
df' [Text]
cols' Vector Int
indices' Vector Int
os') = (DataFrame
df DataFrame -> DataFrame -> Bool
forall a. Eq a => a -> a -> Bool
== DataFrame
df') Bool -> Bool -> Bool
&& ([Text]
cols [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== [Text]
cols')

instance Eq DataFrame where
    (==) :: DataFrame -> DataFrame -> Bool
    DataFrame
a == :: DataFrame -> DataFrame -> Bool
== DataFrame
b =
        Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (DataFrame -> Map Text Int
columnIndices DataFrame
a) [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (DataFrame -> Map Text Int
columnIndices DataFrame
b)
            Bool -> Bool -> Bool
&& ((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
                ( \(Text
name, Int
index) Bool
acc -> Bool
acc Bool -> Bool -> Bool
&& (DataFrame -> Vector Column
columns DataFrame
a Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
index Maybe Column -> Maybe Column -> Bool
forall a. Eq a => a -> a -> Bool
== (DataFrame -> Vector Column
columns DataFrame
b Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? (DataFrame -> Map Text Int
columnIndices DataFrame
b Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)))
                )
                Bool
True
                (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a)

instance Show DataFrame where
    show :: DataFrame -> String
    show :: DataFrame -> String
show DataFrame
d =
        let
            (Int
r, Int
c) = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
d
            d' :: DataFrame
d' =
                DataFrame
d
                    { columns = V.map (takeColumn 10) (columns d)
                    , dataframeDimensions = (min 10 r, c)
                    }
         in
            Text -> String
T.unpack (DataFrame -> Bool -> Text
asText DataFrame
d' Bool
False)
                String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"\n"
                String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"Showing "
                String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
10 Int
r)
                String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" rows out of "
                String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r

-- | For showing the dataframe as markdown in notebooks.
toMarkdownTable :: DataFrame -> T.Text
toMarkdownTable :: DataFrame -> Text
toMarkdownTable DataFrame
df = DataFrame -> Bool -> Text
asText DataFrame
df Bool
True

asText :: DataFrame -> Bool -> T.Text
asText :: DataFrame -> Bool -> Text
asText DataFrame
d Bool
properMarkdown =
    let header :: [Text]
header = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
d))
        types :: [Text]
types = Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"") (Vector Text -> Vector Text) -> Vector Text -> Vector Text
forall a b. (a -> b) -> a -> b
$ (Column -> Text) -> Vector Column -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map Column -> Text
getType (DataFrame -> Vector Column
columns DataFrame
d)
        getType :: Column -> T.Text
        getType :: Column -> Text
getType (BoxedColumn (Vector a
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
        getType (UnboxedColumn (Vector a
column :: VU.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
        getType (OptionalColumn (Vector (Maybe a)
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
        -- Separate out cases dynamically so we don't end up making round trip string
        -- copies.
        get :: Maybe Column -> V.Vector T.Text
        get :: Maybe Column -> Vector Text
get (Just (BoxedColumn (Vector a
column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
            Just a :~: Text
Refl -> Vector a
Vector Text
column
            Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
                Just a :~: String
Refl -> (String -> Text) -> Vector String -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map String -> Text
T.pack Vector a
Vector String
column
                Maybe (a :~: String)
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) Vector a
column
        get (Just (UnboxedColumn Vector a
column)) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column)
        get (Just (OptionalColumn Vector (Maybe a)
column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Vector (Maybe a)
column
        get Maybe Column
Nothing = Vector Text
forall a. Vector a
V.empty
        getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
df (Integer
i, Text
name) = Maybe Column -> Vector Text
get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text
forall a b. (a -> b) -> a -> b
$ Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
(V.!?) (DataFrame -> Vector Column
columns DataFrame
d) (Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
(M.!) (DataFrame -> Map Text Int
columnIndices DataFrame
d) Text
name)
        rows :: [[Text]]
rows =
            [[Text]] -> [[Text]]
forall a. [[a]] -> [[a]]
transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$
                (Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text]
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text])
-> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
d)) [Integer
0 ..] [Text]
header
     in Bool -> [Text] -> [Text] -> [[Text]] -> Text
showTable Bool
properMarkdown [Text]
header [Text]
types [[Text]]
rows

-- | O(1) Creates an empty dataframe
empty :: DataFrame
empty :: DataFrame
empty =
    DataFrame
        { columns :: Vector Column
columns = Vector Column
forall a. Vector a
V.empty
        , columnIndices :: Map Text Int
columnIndices = Map Text Int
forall k a. Map k a
M.empty
        , dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
0, Int
0)
        }

{- | Safely retrieves a column by name from the dataframe.

Returns 'Nothing' if the column does not exist.

==== __Examples__

>>> getColumn "age" df
Just (UnboxedColumn ...)

>>> getColumn "nonexistent" df
Nothing
-}
getColumn :: T.Text -> DataFrame -> Maybe Column
getColumn :: Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df = do
    Int
i <- DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Maybe Int
forall k a. Ord k => Map k a -> k -> Maybe a
M.!? Text
name
    DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
i

{- | Retrieves a column by name from the dataframe, throwing an exception if not found.

This is an unsafe version of 'getColumn' that throws 'ColumnNotFoundException'
if the column does not exist. Use this when you are certain the column exists.

==== __Throws__

* 'ColumnNotFoundException' - if the column with the given name does not exist
-}
unsafeGetColumn :: T.Text -> DataFrame -> Column
unsafeGetColumn :: Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Maybe Column
Nothing -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw (DataFrameException -> Column) -> DataFrameException -> Column
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just Column
col -> Column
col

{- | Checks if the dataframe is empty (has no columns).

Returns 'True' if the dataframe has no columns, 'False' otherwise.
Note that a dataframe with columns but no rows is not considered null.
-}
null :: DataFrame -> Bool
null :: DataFrame -> Bool
null DataFrame
df = Vector Column -> Bool
forall a. Vector a -> Bool
V.null (DataFrame -> Vector Column
columns DataFrame
df)

{- | Returns a dataframe as a two dimensional vector of floats.

Converts all columns in the dataframe to float vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to floats.
-}
toFloatMatrix ::
    DataFrame -> Either DataFrameException (V.Vector (VU.Vector Float))
toFloatMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Float))
toFloatMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Float))
 -> Column -> Either DataFrameException (Vector (Vector Float)))
-> Either DataFrameException (Vector (Vector Float))
-> Vector Column
-> Either DataFrameException (Vector (Vector Float))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Float))
acc Column
c -> Vector (Vector Float) -> Vector Float -> Vector (Vector Float)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Float) -> Vector Float -> Vector (Vector Float))
-> Either DataFrameException (Vector (Vector Float))
-> Either
     DataFrameException (Vector Float -> Vector (Vector Float))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Float))
acc Either DataFrameException (Vector Float -> Vector (Vector Float))
-> Either DataFrameException (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Float)
toFloatVector Column
c)
    (Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b. b -> Either a b
Right Vector (Vector Float)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Float)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Float))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Float)
m ->
        Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Float)
 -> Either DataFrameException (Vector (Vector Float)))
-> Vector (Vector Float)
-> Either DataFrameException (Vector (Vector Float))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Float) -> Vector (Vector Float)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Float -> Int -> Vector Float)
-> Vector Float -> [Int] -> Vector Float
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Float
acc Int
j -> Vector Float
acc Vector Float -> Float -> Vector Float
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Float)
m Vector (Vector Float) -> Int -> Vector Float
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Float -> Int -> Float
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Float
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Float) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Float)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Returns a dataframe as a two dimensional vector of doubles.

Converts all columns in the dataframe to double vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to doubles.
-}
toDoubleMatrix ::
    DataFrame -> Either DataFrameException (V.Vector (VU.Vector Double))
toDoubleMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Double))
toDoubleMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Double))
 -> Column -> Either DataFrameException (Vector (Vector Double)))
-> Either DataFrameException (Vector (Vector Double))
-> Vector Column
-> Either DataFrameException (Vector (Vector Double))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Double))
acc Column
c -> Vector (Vector Double) -> Vector Double -> Vector (Vector Double)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Double) -> Vector Double -> Vector (Vector Double))
-> Either DataFrameException (Vector (Vector Double))
-> Either
     DataFrameException (Vector Double -> Vector (Vector Double))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Double))
acc Either DataFrameException (Vector Double -> Vector (Vector Double))
-> Either DataFrameException (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Double)
toDoubleVector Column
c)
    (Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b. b -> Either a b
Right Vector (Vector Double)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Double)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Double))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Double)
m ->
        Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Double)
 -> Either DataFrameException (Vector (Vector Double)))
-> Vector (Vector Double)
-> Either DataFrameException (Vector (Vector Double))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Double) -> Vector (Vector Double)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Double -> Int -> Vector Double)
-> Vector Double -> [Int] -> Vector Double
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Double
acc Int
j -> Vector Double
acc Vector Double -> Double -> Vector Double
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Double)
m Vector (Vector Double) -> Int -> Vector Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Double
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Double) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Returns a dataframe as a two dimensional vector of ints.

Converts all columns in the dataframe to int vectors and transposes them
into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns 'Left' with an error if any column cannot be converted to ints.
-}
toIntMatrix :: DataFrame -> Either DataFrameException (V.Vector (VU.Vector Int))
toIntMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Int))
toIntMatrix DataFrame
df = case (Either DataFrameException (Vector (Vector Int))
 -> Column -> Either DataFrameException (Vector (Vector Int)))
-> Either DataFrameException (Vector (Vector Int))
-> Vector Column
-> Either DataFrameException (Vector (Vector Int))
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
    (\Either DataFrameException (Vector (Vector Int))
acc Column
c -> Vector (Vector Int) -> Vector Int -> Vector (Vector Int)
forall a. Vector a -> a -> Vector a
V.snoc (Vector (Vector Int) -> Vector Int -> Vector (Vector Int))
-> Either DataFrameException (Vector (Vector Int))
-> Either DataFrameException (Vector Int -> Vector (Vector Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either DataFrameException (Vector (Vector Int))
acc Either DataFrameException (Vector Int -> Vector (Vector Int))
-> Either DataFrameException (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Column -> Either DataFrameException (Vector Int)
toIntVector Column
c)
    (Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b. b -> Either a b
Right Vector (Vector Int)
forall a. Vector a
V.empty :: Either DataFrameException (V.Vector (VU.Vector Int)))
    (DataFrame -> Vector Column
columns DataFrame
df) of
    Left DataFrameException
e -> DataFrameException
-> Either DataFrameException (Vector (Vector Int))
forall a b. a -> Either a b
Left DataFrameException
e
    Right Vector (Vector Int)
m ->
        Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Vector (Vector Int)
 -> Either DataFrameException (Vector (Vector Int)))
-> Vector (Vector Int)
-> Either DataFrameException (Vector (Vector Int))
forall a b. (a -> b) -> a -> b
$
            Int -> (Int -> Vector Int) -> Vector (Vector Int)
forall a. Int -> (Int -> a) -> Vector a
V.generate
                ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df))
                ( \Int
i ->
                    (Vector Int -> Int -> Vector Int)
-> Vector Int -> [Int] -> Vector Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                        (\Vector Int
acc Int
j -> Vector Int
acc Vector Int -> Int -> Vector Int
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` ((Vector (Vector Int)
m Vector (Vector Int) -> Int -> Vector Int
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
j) Vector Int -> Int -> Int
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
                        Vector Int
forall a. Unbox a => Vector a
VU.empty
                        [Int
0 .. (Vector (Vector Int) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Int)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
                )

{- | Get a specific column as a vector.

You must specify the type via type applications.

==== __Examples__

>>> columnAsVector @Int "age" df
[25, 30, 35, ...]

>>> columnAsVector @Text "name" df
["Alice", "Bob", "Charlie", ...]

==== __Throws__

* 'error' - if the column type doesn't match the requested type
-}
columnAsVector :: forall a. (Columnable a) => T.Text -> DataFrame -> V.Vector a
columnAsVector :: forall a. Columnable a => Text -> DataFrame -> Vector a
columnAsVector Text
name DataFrame
df = case Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df of
    (BoxedColumn (Vector a
col :: V.Vector b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
        Maybe (a :~: a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
        Just a :~: a
Refl -> Vector a
Vector a
col
    (OptionalColumn (Vector (Maybe a)
col :: V.Vector b)) -> case TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
        Maybe (a :~: Maybe a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
        Just a :~: Maybe a
Refl -> Vector a
Vector (Maybe a)
col
    (UnboxedColumn (Vector a
col :: VU.Vector b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
        Maybe (a :~: a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
        Just a :~: a
Refl -> Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert Vector a
Vector a
col

{- | Retrieves a column as an unboxed vector of 'Int' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to ints.
This may occur if the column contains non-numeric data or values outside the 'Int' range.
-}
columnAsIntVector ::
    T.Text -> DataFrame -> Either DataFrameException (VU.Vector Int)
columnAsIntVector :: Text -> DataFrame -> Either DataFrameException (Vector Int)
columnAsIntVector Text
name DataFrame
df = Column -> Either DataFrameException (Vector Int)
toIntVector (Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df)

{- | Retrieves a column as an unboxed vector of 'Double' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to doubles.
This may occur if the column contains non-numeric data.
-}
columnAsDoubleVector ::
    T.Text -> DataFrame -> Either DataFrameException (VU.Vector Double)
columnAsDoubleVector :: Text -> DataFrame -> Either DataFrameException (Vector Double)
columnAsDoubleVector Text
name DataFrame
df = Column -> Either DataFrameException (Vector Double)
toDoubleVector (Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df)

{- | Retrieves a column as an unboxed vector of 'Float' values.

Returns 'Left' with a 'DataFrameException' if the column cannot be converted to floats.
This may occur if the column contains non-numeric data.
-}
columnAsFloatVector ::
    T.Text -> DataFrame -> Either DataFrameException (VU.Vector Float)
columnAsFloatVector :: Text -> DataFrame -> Either DataFrameException (Vector Float)
columnAsFloatVector Text
name DataFrame
df = Column -> Either DataFrameException (Vector Float)
toFloatVector (Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df)