{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE FlexibleContexts #-}
module DataFrame.Internal.DataFrame where

import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU

import Control.Monad (join)
import DataFrame.Display.Terminal.PrettyPrint
import DataFrame.Internal.Column
import Data.Function (on)
import Data.List (sortBy, transpose)
import Data.Maybe (isJust)
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Type.Reflection (typeRep)

data DataFrame = DataFrame
  { -- | Our main data structure stores a dataframe as
    -- a vector of columns. This improv
    DataFrame -> Vector (Maybe Column)
columns :: V.Vector (Maybe Column),
    -- | Keeps the column names in the order they were inserted in.
    DataFrame -> Map Text Int
columnIndices :: M.Map T.Text Int,
    -- | Next free index that we insert a column into.
    DataFrame -> [Int]
freeIndices :: [Int],
    DataFrame -> (Int, Int)
dataframeDimensions :: (Int, Int)
  }

instance Eq DataFrame where
  (==) :: DataFrame -> DataFrame -> Bool
  DataFrame
a == :: DataFrame -> DataFrame -> Bool
== DataFrame
b = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a) [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
b) Bool -> Bool -> Bool
&&
           ((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Text
name, Int
index) Bool
acc -> Bool
acc Bool -> Bool -> Bool
&& (DataFrame -> Vector (Maybe Column)
columns DataFrame
a Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? Int
index Maybe (Maybe Column) -> Maybe (Maybe Column) -> Bool
forall a. Eq a => a -> a -> Bool
== (DataFrame -> Vector (Maybe Column)
columns DataFrame
b Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? (DataFrame -> Map Text Int
columnIndices DataFrame
b Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)))) Bool
True (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a)

instance Show DataFrame where
  show :: DataFrame -> String
  show :: DataFrame -> [Char]
show DataFrame
d = Text -> [Char]
T.unpack (DataFrame -> Bool -> Text
asText DataFrame
d Bool
False)

asText :: DataFrame -> Bool -> T.Text
asText :: DataFrame -> Bool -> Text
asText DataFrame
d Bool
properMarkdown =
  let header :: [Text]
header = Text
"index" Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
d))
      types :: [Text]
types = Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"") (Vector Text -> Vector Text) -> Vector Text -> Vector Text
forall a b. (a -> b) -> a -> b
$ (Maybe Column -> Text) -> Vector (Maybe Column) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe Column -> Text
getType (DataFrame -> Vector (Maybe Column)
columns DataFrame
d)
      getType :: Maybe Column -> T.Text
      getType :: Maybe Column -> Text
getType Maybe Column
Nothing = Text
""
      getType (Just (BoxedColumn (Vector a
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (Just (UnboxedColumn (Vector a
column :: VU.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (Just (OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (Just (GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Vector a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      getType (Just (GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Vector a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
      -- Separate out cases dynamically so we don't end up making round trip string
      -- copies.
      get :: Maybe Column -> V.Vector T.Text
      get :: Maybe Column -> Vector Text
get (Just (BoxedColumn (Vector a
column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
              Just a :~: Text
Refl -> Vector a
Vector Text
column
              Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
                Just a :~: [Char]
Refl -> ([Char] -> Text) -> Vector [Char] -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map [Char] -> Text
T.pack Vector a
Vector [Char]
column
                Maybe (a :~: [Char])
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) Vector a
column
      get (Just (UnboxedColumn Vector a
column)) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column)
      get (Just (OptionalColumn Vector (Maybe a)
column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Maybe a -> [Char]) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Maybe a)
column
      get (Just (GroupedBoxedColumn Vector (Vector a)
column)) = (Vector a -> Text) -> Vector (Vector a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Vector a -> [Char]) -> Vector a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Vector a)
column
      get (Just (GroupedUnboxedColumn Vector (Vector a)
column)) = (Vector a -> Text) -> Vector (Vector a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Vector a -> [Char]) -> Vector a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Vector a)
column
      getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
df (Integer
i, Text
name) = if Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
                                            then [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0..((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])
                                            else Maybe Column -> Vector Text
get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text
forall a b. (a -> b) -> a -> b
$ Vector (Maybe Column) -> Int -> Maybe Column
forall a. Vector a -> Int -> a
(V.!) (DataFrame -> Vector (Maybe Column)
columns DataFrame
d) (Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
(M.!) (DataFrame -> Map Text Int
columnIndices DataFrame
d) Text
name)
      rows :: [[Text]]
rows =
        [[Text]] -> [[Text]]
forall a. [[a]] -> [[a]]
transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$
          (Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text]
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text])
-> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
d)) [Integer
0..] [Text]
header
   in (if Bool
properMarkdown then [Text] -> [Text] -> [[Text]] -> Text
showTableProperMarkdown else [Text] -> [Text] -> [[Text]] -> Text
showTable) [Text]
header (Text
"Int"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
types) [[Text]]
rows

-- | O(1) Creates an empty dataframe
empty :: DataFrame
empty :: DataFrame
empty = DataFrame {columns :: Vector (Maybe Column)
columns = Int -> Maybe Column -> Vector (Maybe Column)
forall a. Int -> a -> Vector a
V.replicate Int
initialColumnSize Maybe Column
forall a. Maybe a
Nothing,
                   columnIndices :: Map Text Int
columnIndices = Map Text Int
forall k a. Map k a
M.empty,
                   freeIndices :: [Int]
freeIndices = [Int
0..(Int
initialColumnSize Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)],
                   dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
0, Int
0) }

initialColumnSize :: Int
initialColumnSize :: Int
initialColumnSize = Int
8

getColumn :: T.Text -> DataFrame -> Maybe Column
getColumn :: Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df = do
  Int
i <- DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Maybe Int
forall k a. Ord k => Map k a -> k -> Maybe a
M.!? Text
name
  Maybe (Maybe Column) -> Maybe Column
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Maybe (Maybe Column) -> Maybe Column)
-> Maybe (Maybe Column) -> Maybe Column
forall a b. (a -> b) -> a -> b
$ DataFrame -> Vector (Maybe Column)
columns DataFrame
df Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i

null :: DataFrame -> Bool
null :: DataFrame -> Bool
null DataFrame
df = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df (Int, Int) -> (Int, Int) -> Bool
forall a. Eq a => a -> a -> Bool
== (Int
0, Int
0)

metadata :: DataFrame -> String
metadata :: DataFrame -> [Char]
metadata DataFrame
df = Map Text Int -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> Map Text Int
columnIndices DataFrame
df) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
              Vector (Maybe [Char]) -> [Char]
forall a. Show a => a -> [Char]
show ((Maybe Column -> Maybe [Char])
-> Vector (Maybe Column) -> Vector (Maybe [Char])
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((Column -> [Char]) -> Maybe Column -> Maybe [Char]
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Column -> [Char]
columnVersionString) (DataFrame -> Vector (Maybe Column)
columns DataFrame
df)) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
              [Int] -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> [Int]
freeIndices DataFrame
df) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
              (Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)