{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE FlexibleContexts #-}
module DataFrame.Internal.DataFrame where
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Generic as VG
import Control.Monad (join)
import DataFrame.Display.Terminal.PrettyPrint
import DataFrame.Internal.Column
import Data.Function (on)
import Data.List (sortBy, transpose, (\\))
import Data.Maybe (isJust)
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Text.Printf
import Type.Reflection (typeRep)
data DataFrame = DataFrame
{
DataFrame -> Vector Column
columns :: V.Vector Column,
DataFrame -> Map Text Int
columnIndices :: M.Map T.Text Int,
DataFrame -> (Int, Int)
dataframeDimensions :: (Int, Int)
}
data GroupedDataFrame = Grouped {
GroupedDataFrame -> DataFrame
fullDataframe :: DataFrame,
GroupedDataFrame -> [Text]
groupedColumns :: [T.Text],
GroupedDataFrame -> Vector Int
valueIndices :: VU.Vector Int,
GroupedDataFrame -> Vector Int
offsets :: VU.Vector Int
}
instance Show GroupedDataFrame where
show :: GroupedDataFrame -> String
show (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) = String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"{ keyColumns: %s groupedColumns: %s }"
([Text] -> String
forall a. Show a => a -> String
show [Text]
cols)
([Text] -> String
forall a. Show a => a -> String
show ((Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (DataFrame -> Map Text Int
columnIndices DataFrame
df)) [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
\\ [Text]
cols))
instance Eq GroupedDataFrame where
== :: GroupedDataFrame -> GroupedDataFrame -> Bool
(==) (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) (Grouped DataFrame
df' [Text]
cols' Vector Int
indices' Vector Int
os') = (DataFrame
df DataFrame -> DataFrame -> Bool
forall a. Eq a => a -> a -> Bool
== DataFrame
df') Bool -> Bool -> Bool
&& ([Text]
cols [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== [Text]
cols')
instance Eq DataFrame where
(==) :: DataFrame -> DataFrame -> Bool
DataFrame
a == :: DataFrame -> DataFrame -> Bool
== DataFrame
b = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a) [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
b) Bool -> Bool -> Bool
&&
((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Text
name, Int
index) Bool
acc -> Bool
acc Bool -> Bool -> Bool
&& (DataFrame -> Vector Column
columns DataFrame
a Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
index Maybe Column -> Maybe Column -> Bool
forall a. Eq a => a -> a -> Bool
== (DataFrame -> Vector Column
columns DataFrame
b Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? (DataFrame -> Map Text Int
columnIndices DataFrame
b Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)))) Bool
True (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a)
instance Show DataFrame where
show :: DataFrame -> String
show :: DataFrame -> String
show DataFrame
d = Text -> String
T.unpack (DataFrame -> Bool -> Text
asText DataFrame
d Bool
False)
asText :: DataFrame -> Bool -> T.Text
asText :: DataFrame -> Bool -> Text
asText DataFrame
d Bool
properMarkdown =
let header :: [Text]
header = Text
"index" Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
d))
types :: [Text]
types = Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"") (Vector Text -> Vector Text) -> Vector Text -> Vector Text
forall a b. (a -> b) -> a -> b
$ (Column -> Text) -> Vector Column -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map Column -> Text
getType (DataFrame -> Vector Column
columns DataFrame
d)
getType :: Column -> T.Text
getType :: Column -> Text
getType (BoxedColumn (Vector a
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (UnboxedColumn (Vector a
column :: VU.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (OptionalColumn (Vector (Maybe a)
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
get :: Maybe Column -> V.Vector T.Text
get :: Maybe Column -> Vector Text
get (Just (BoxedColumn (Vector a
column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> Vector a
Vector Text
column
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: String
Refl -> (String -> Text) -> Vector String -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map String -> Text
T.pack Vector a
Vector String
column
Maybe (a :~: String)
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) Vector a
column
get (Just (UnboxedColumn Vector a
column)) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column)
get (Just (OptionalColumn Vector (Maybe a)
column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Vector (Maybe a)
column
getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
df (Integer
i, Text
name) = if Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
then [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Int -> String) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show) [Int
0..((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])
else Maybe Column -> Vector Text
get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text
forall a b. (a -> b) -> a -> b
$ Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
(V.!?) (DataFrame -> Vector Column
columns DataFrame
d) (Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
(M.!) (DataFrame -> Map Text Int
columnIndices DataFrame
d) Text
name)
rows :: [[Text]]
rows =
[[Text]] -> [[Text]]
forall a. [[a]] -> [[a]]
transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$
(Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text]
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text])
-> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
d)) [Integer
0..] [Text]
header
in Bool -> [Text] -> [Text] -> [[Text]] -> Text
showTable Bool
properMarkdown [Text]
header (Text
"Int"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
types) [[Text]]
rows
empty :: DataFrame
empty :: DataFrame
empty = DataFrame {columns :: Vector Column
columns = Vector Column
forall a. Vector a
V.empty,
columnIndices :: Map Text Int
columnIndices = Map Text Int
forall k a. Map k a
M.empty,
dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
0, Int
0) }
getColumn :: T.Text -> DataFrame -> Maybe Column
getColumn :: Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df = do
Int
i <- DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Maybe Int
forall k a. Ord k => Map k a -> k -> Maybe a
M.!? Text
name
DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
i
unsafeGetColumn :: T.Text -> DataFrame -> Column
unsafeGetColumn :: Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! (DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)
null :: DataFrame -> Bool
null :: DataFrame -> Bool
null DataFrame
df = Vector Column -> Bool
forall a. Vector a -> Bool
V.null (DataFrame -> Vector Column
columns DataFrame
df)
toMatrix :: DataFrame -> V.Vector (VU.Vector Float)
toMatrix :: DataFrame -> Vector (Vector Float)
toMatrix DataFrame
df = let
m :: Vector (Vector Double)
m = (Column -> Vector Double)
-> Vector Column -> Vector (Vector Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (forall a. Columnable a => Column -> Vector a
toVector @Double) (DataFrame -> Vector Column
columns DataFrame
df)
in Int -> (Int -> Vector Float) -> Vector (Vector Float)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (\Int
i -> (Vector Float -> Int -> Vector Float)
-> Vector Float -> [Int] -> Vector Float
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Vector Float
acc Int
j -> Vector Float
acc Vector Float -> Float -> Vector Float
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` (Double -> Float
forall a b. (Real a, Fractional b) => a -> b
realToFrac ((Vector (Vector Double)
m Vector (Vector Double) -> Int -> Vector Double
forall a. Vector a -> Int -> a
V.! Int
j) Vector Double -> Int -> Double
forall a. Vector a -> Int -> a
V.! Int
i))) Vector Float
forall a. Unbox a => Vector a
VU.empty [Int
0..(Vector (Vector Double) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])
columnAsVector :: forall a . Columnable a => T.Text -> DataFrame -> V.Vector a
columnAsVector :: forall a. Columnable a => Text -> DataFrame -> Vector a
columnAsVector Text
name DataFrame
df = case Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df of
(BoxedColumn (Vector a
col :: V.Vector b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
Maybe (a :~: a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
Just a :~: a
Refl -> Vector a
Vector a
col
(OptionalColumn (Vector (Maybe a)
col :: V.Vector b)) -> case TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
Maybe (a :~: Maybe a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
Just a :~: Maybe a
Refl -> Vector a
Vector (Maybe a)
col
(UnboxedColumn (Vector a
col :: VU.Vector b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
Maybe (a :~: a)
Nothing -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
Just a :~: a
Refl -> Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert Vector a
Vector a
col