{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE FlexibleContexts #-}
module DataFrame.Internal.DataFrame where

import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Generic as VG

import Control.Monad (join)
import DataFrame.Display.Terminal.PrettyPrint
import DataFrame.Internal.Column
import Data.Function (on)
import Data.List (sortBy, transpose, (\\))
import Data.Maybe (isJust)
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Text.Printf
import Type.Reflection (typeRep)

data DataFrame = DataFrame
  { -- | Our main data structure stores a dataframe as
    -- a vector of columns. This improv
    DataFrame -> Vector Column
columns :: V.Vector Column,
    -- | Keeps the column names in the order they were inserted in.
    DataFrame -> Map Text Int
columnIndices :: M.Map T.Text Int,
    DataFrame -> (Int, Int)
dataframeDimensions :: (Int, Int)
  }

data GroupedDataFrame = Grouped {
  GroupedDataFrame -> DataFrame
fullDataframe :: DataFrame,
  GroupedDataFrame -> [Text]
groupedColumns :: [T.Text],
  GroupedDataFrame -> Vector Int
valueIndices :: VU.Vector Int,
  GroupedDataFrame -> Vector Int
offsets :: VU.Vector Int
}

instance Show GroupedDataFrame where
  show :: GroupedDataFrame -> String
show (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) = String -> String -> ShowS
forall r. PrintfType r => String -> r
printf String
"{ keyColumns: %s groupedColumns: %s }" 
                                             ([Text] -> String
forall a. Show a => a -> String
show [Text]
cols)
                                             ([Text] -> String
forall a. Show a => a -> String
show ((Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (DataFrame -> Map Text Int
columnIndices DataFrame
df)) [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
\\ [Text]
cols))

instance Eq GroupedDataFrame where
  == :: GroupedDataFrame -> GroupedDataFrame -> Bool
(==) (Grouped DataFrame
df [Text]
cols Vector Int
indices Vector Int
os) (Grouped DataFrame
df' [Text]
cols' Vector Int
indices' Vector Int
os') = (DataFrame
df DataFrame -> DataFrame -> Bool
forall a. Eq a => a -> a -> Bool
== DataFrame
df') Bool -> Bool -> Bool
&& ([Text]
cols [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== [Text]
cols') 


instance Eq DataFrame where
  (==) :: DataFrame -> DataFrame -> Bool
  DataFrame
a == :: DataFrame -> DataFrame -> Bool
== DataFrame
b = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a) [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
b) Bool -> Bool -> Bool
&&
           ((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Text
name, Int
index) Bool
acc -> Bool
acc Bool -> Bool -> Bool
&& (DataFrame -> Vector Column
columns DataFrame
a Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
index Maybe Column -> Maybe Column -> Bool
forall a. Eq a => a -> a -> Bool
== (DataFrame -> Vector Column
columns DataFrame
b Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? (DataFrame -> Map Text Int
columnIndices DataFrame
b Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)))) Bool
True (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a)

instance Show DataFrame where
  show :: DataFrame -> String
  show :: DataFrame -> String
show DataFrame
d = Text -> String
T.unpack (DataFrame -> Bool -> Text
asText DataFrame
d Bool
False)

asText :: DataFrame -> Bool -> T.Text
asText :: DataFrame -> Bool -> Text
asText DataFrame
d Bool
properMarkdown =
  let header :: [Text]
header = Text
"index" Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
d))
      types :: [Text]
types = Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"") (Vector Text -> Vector Text) -> Vector Text -> Vector Text
forall a b. (a -> b) -> a -> b
$ (Column -> Text) -> Vector Column -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map Column -> Text
getType (DataFrame -> Vector Column
columns DataFrame
d)
      getType :: Column -> T.Text
      getType :: Column -> Text
getType (BoxedColumn (Vector a
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (UnboxedColumn (Vector a
column :: VU.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (OptionalColumn (Vector (Maybe a)
column :: V.Vector a)) = String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      -- Separate out cases dynamically so we don't end up making round trip string
      -- copies.
      get :: Maybe Column -> V.Vector T.Text
      get :: Maybe Column -> Vector Text
get (Just (BoxedColumn (Vector a
column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
              Just a :~: Text
Refl -> Vector a
Vector Text
column
              Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep String -> Maybe (a :~: String)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
                Just a :~: String
Refl -> (String -> Text) -> Vector String -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map String -> Text
T.pack Vector a
Vector String
column
                Maybe (a :~: String)
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) Vector a
column
      get (Just (UnboxedColumn Vector a
column)) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column)
      get (Just (OptionalColumn Vector (Maybe a)
column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Vector (Maybe a)
column
      getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
df (Integer
i, Text
name) = if Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
                                            then [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> (Int -> String) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String
forall a. Show a => a -> String
show) [Int
0..((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])
                                            else Maybe Column -> Vector Text
get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text
forall a b. (a -> b) -> a -> b
$ Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
(V.!?) (DataFrame -> Vector Column
columns DataFrame
d) (Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
(M.!) (DataFrame -> Map Text Int
columnIndices DataFrame
d) Text
name)
      rows :: [[Text]]
rows =
        [[Text]] -> [[Text]]
forall a. [[a]] -> [[a]]
transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$
          (Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text]
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text])
-> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
d)) [Integer
0..] [Text]
header
   in Bool -> [Text] -> [Text] -> [[Text]] -> Text
showTable Bool
properMarkdown [Text]
header (Text
"Int"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
types) [[Text]]
rows

-- | O(1) Creates an empty dataframe
empty :: DataFrame
empty :: DataFrame
empty = DataFrame {columns :: Vector Column
columns = Vector Column
forall a. Vector a
V.empty,
                   columnIndices :: Map Text Int
columnIndices = Map Text Int
forall k a. Map k a
M.empty,
                   dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
0, Int
0) }

getColumn :: T.Text -> DataFrame -> Maybe Column
getColumn :: Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df = do
  Int
i <- DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Maybe Int
forall k a. Ord k => Map k a -> k -> Maybe a
M.!? Text
name
  DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Maybe Column
forall a. Vector a -> Int -> Maybe a
V.!? Int
i

unsafeGetColumn :: T.Text -> DataFrame -> Column
unsafeGetColumn :: Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! (DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)

null :: DataFrame -> Bool
null :: DataFrame -> Bool
null DataFrame
df = Vector Column -> Bool
forall a. Vector a -> Bool
V.null (DataFrame -> Vector Column
columns DataFrame
df)

toMatrix :: DataFrame -> V.Vector (VU.Vector Float)
toMatrix :: DataFrame -> Vector (Vector Float)
toMatrix DataFrame
df = let
    m :: Vector (Vector Double)
m = (Column -> Vector Double)
-> Vector Column -> Vector (Vector Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (forall a. Columnable a => Column -> Vector a
toVector @Double) (DataFrame -> Vector Column
columns DataFrame
df)
  in Int -> (Int -> Vector Float) -> Vector (Vector Float)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (\Int
i -> (Vector Float -> Int -> Vector Float)
-> Vector Float -> [Int] -> Vector Float
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\Vector Float
acc Int
j -> Vector Float
acc Vector Float -> Float -> Vector Float
forall a. Unbox a => Vector a -> a -> Vector a
`VU.snoc` (Double -> Float
forall a b. (Real a, Fractional b) => a -> b
realToFrac ((Vector (Vector Double)
m Vector (Vector Double) -> Int -> Vector Double
forall a. Vector a -> Int -> a
V.! Int
j) Vector Double -> Int -> Double
forall a. Vector a -> Int -> a
V.! Int
i))) Vector Float
forall a. Unbox a => Vector a
VU.empty [Int
0..(Vector (Vector Double) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double)
m Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])

columnAsVector :: forall a . Columnable a => T.Text -> DataFrame -> V.Vector a
columnAsVector :: forall a. Columnable a => Text -> DataFrame -> Vector a
columnAsVector Text
name DataFrame
df = case Text -> DataFrame -> Column
unsafeGetColumn Text
name DataFrame
df of
  (BoxedColumn (Vector a
col :: V.Vector b))    -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
    Maybe (a :~: a)
Nothing   -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
    Just a :~: a
Refl -> Vector a
Vector a
col
  (OptionalColumn (Vector (Maybe a)
col :: V.Vector b)) -> case TypeRep a -> TypeRep (Maybe a) -> Maybe (a :~: Maybe a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
    Maybe (a :~: Maybe a)
Nothing   -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
    Just a :~: Maybe a
Refl -> Vector a
Vector (Maybe a)
col
  (UnboxedColumn (Vector a
col :: VU.Vector b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
    Maybe (a :~: a)
Nothing   -> String -> Vector a
forall a. HasCallStack => String -> a
error String
"Type error"
    Just a :~: a
Refl -> Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert Vector a
Vector a
col