{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE FlexibleContexts #-}
module DataFrame.Internal.DataFrame where
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Control.Monad (join)
import DataFrame.Display.Terminal.PrettyPrint
import DataFrame.Internal.Column
import Data.Function (on)
import Data.List (sortBy, transpose)
import Data.Maybe (isJust)
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Type.Reflection (typeRep)
data DataFrame = DataFrame
{
DataFrame -> Vector (Maybe Column)
columns :: V.Vector (Maybe Column),
DataFrame -> Map Text Int
columnIndices :: M.Map T.Text Int,
DataFrame -> [Int]
freeIndices :: [Int],
DataFrame -> (Int, Int)
dataframeDimensions :: (Int, Int)
}
instance Eq DataFrame where
(==) :: DataFrame -> DataFrame -> Bool
DataFrame
a == :: DataFrame -> DataFrame -> Bool
== DataFrame
b = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a) [Text] -> [Text] -> Bool
forall a. Eq a => a -> a -> Bool
== ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
b) Bool -> Bool -> Bool
&&
((Text, Int) -> Bool -> Bool) -> Bool -> [(Text, Int)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\(Text
name, Int
index) Bool
acc -> Bool
acc Bool -> Bool -> Bool
&& (DataFrame -> Vector (Maybe Column)
columns DataFrame
a Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? Int
index Maybe (Maybe Column) -> Maybe (Maybe Column) -> Bool
forall a. Eq a => a -> a -> Bool
== (DataFrame -> Vector (Maybe Column)
columns DataFrame
b Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? (DataFrame -> Map Text Int
columnIndices DataFrame
b Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Text
name)))) Bool
True (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
a)
instance Show DataFrame where
show :: DataFrame -> String
show :: DataFrame -> [Char]
show DataFrame
d = Text -> [Char]
T.unpack (DataFrame -> Bool -> Text
asText DataFrame
d Bool
False)
asText :: DataFrame -> Bool -> T.Text
asText :: DataFrame -> Bool -> Text
asText DataFrame
d Bool
properMarkdown =
let header :: [Text]
header = Text
"index" Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
d))
types :: [Text]
types = Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> Vector Text -> Vector Text
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"") (Vector Text -> Vector Text) -> Vector Text -> Vector Text
forall a b. (a -> b) -> a -> b
$ (Maybe Column -> Text) -> Vector (Maybe Column) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe Column -> Text
getType (DataFrame -> Vector (Maybe Column)
columns DataFrame
d)
getType :: Maybe Column -> T.Text
getType :: Maybe Column -> Text
getType Maybe Column
Nothing = Text
""
getType (Just (BoxedColumn (Vector a
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (Just (UnboxedColumn (Vector a
column :: VU.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep a -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (Just (OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Maybe a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (Just (GroupedBoxedColumn (Vector (Vector a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Vector a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
getType (Just (GroupedUnboxedColumn (Vector (Vector a)
column :: V.Vector a))) = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ TypeRep (Vector a) -> [Char]
forall a. Show a => a -> [Char]
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a)
get :: Maybe Column -> V.Vector T.Text
get :: Maybe Column -> Vector Text
get (Just (BoxedColumn (Vector a
column :: V.Vector a))) = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> Vector a
Vector Text
column
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> ([Char] -> Text) -> Vector [Char] -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map [Char] -> Text
T.pack Vector a
Vector [Char]
column
Maybe (a :~: [Char])
Nothing -> (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) Vector a
column
get (Just (UnboxedColumn Vector a
column)) = (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
V.convert Vector a
column)
get (Just (OptionalColumn Vector (Maybe a)
column)) = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Maybe a -> [Char]) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Maybe a)
column
get (Just (GroupedBoxedColumn Vector (Vector a)
column)) = (Vector a -> Text) -> Vector (Vector a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Vector a -> [Char]) -> Vector a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Vector a)
column
get (Just (GroupedUnboxedColumn Vector (Vector a)
column)) = (Vector a -> Text) -> Vector (Vector a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map ([Char] -> Text
T.pack ([Char] -> Text) -> (Vector a -> [Char]) -> Vector a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> [Char]
forall a. Show a => a -> [Char]
show) Vector (Vector a)
column
getTextColumnFromFrame :: DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
df (Integer
i, Text
name) = if Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
then [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList ((Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ([Char] -> Text
T.pack ([Char] -> Text) -> (Int -> [Char]) -> Int -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [Char]
forall a. Show a => a -> [Char]
show) [Int
0..((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)])
else Maybe Column -> Vector Text
get (Maybe Column -> Vector Text) -> Maybe Column -> Vector Text
forall a b. (a -> b) -> a -> b
$ Vector (Maybe Column) -> Int -> Maybe Column
forall a. Vector a -> Int -> a
(V.!) (DataFrame -> Vector (Maybe Column)
columns DataFrame
d) (Map Text Int -> Text -> Int
forall k a. Ord k => Map k a -> k -> a
(M.!) (DataFrame -> Map Text Int
columnIndices DataFrame
d) Text
name)
rows :: [[Text]]
rows =
[[Text]] -> [[Text]]
forall a. [[a]] -> [[a]]
transpose ([[Text]] -> [[Text]]) -> [[Text]] -> [[Text]]
forall a b. (a -> b) -> a -> b
$
(Integer -> Text -> [Text]) -> [Integer] -> [Text] -> [[Text]]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (((Integer, Text) -> [Text]) -> Integer -> Text -> [Text]
forall a b c. ((a, b) -> c) -> a -> b -> c
curry (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text])
-> ((Integer, Text) -> Vector Text) -> (Integer, Text) -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> (Integer, Text) -> Vector Text
getTextColumnFromFrame DataFrame
d)) [Integer
0..] [Text]
header
in (if Bool
properMarkdown then [Text] -> [Text] -> [[Text]] -> Text
showTableProperMarkdown else [Text] -> [Text] -> [[Text]] -> Text
showTable) [Text]
header (Text
"Int"Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
types) [[Text]]
rows
empty :: DataFrame
empty :: DataFrame
empty = DataFrame {columns :: Vector (Maybe Column)
columns = Int -> Maybe Column -> Vector (Maybe Column)
forall a. Int -> a -> Vector a
V.replicate Int
initialColumnSize Maybe Column
forall a. Maybe a
Nothing,
columnIndices :: Map Text Int
columnIndices = Map Text Int
forall k a. Map k a
M.empty,
freeIndices :: [Int]
freeIndices = [Int
0..(Int
initialColumnSize Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)],
dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
0, Int
0) }
initialColumnSize :: Int
initialColumnSize :: Int
initialColumnSize = Int
8
getColumn :: T.Text -> DataFrame -> Maybe Column
getColumn :: Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df = do
Int
i <- DataFrame -> Map Text Int
columnIndices DataFrame
df Map Text Int -> Text -> Maybe Int
forall k a. Ord k => Map k a -> k -> Maybe a
M.!? Text
name
Maybe (Maybe Column) -> Maybe Column
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Maybe (Maybe Column) -> Maybe Column)
-> Maybe (Maybe Column) -> Maybe Column
forall a b. (a -> b) -> a -> b
$ DataFrame -> Vector (Maybe Column)
columns DataFrame
df Vector (Maybe Column) -> Int -> Maybe (Maybe Column)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i
null :: DataFrame -> Bool
null :: DataFrame -> Bool
null DataFrame
df = DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df (Int, Int) -> (Int, Int) -> Bool
forall a. Eq a => a -> a -> Bool
== (Int
0, Int
0)
metadata :: DataFrame -> String
metadata :: DataFrame -> [Char]
metadata DataFrame
df = Map Text Int -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> Map Text Int
columnIndices DataFrame
df) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
Vector (Maybe [Char]) -> [Char]
forall a. Show a => a -> [Char]
show ((Maybe Column -> Maybe [Char])
-> Vector (Maybe Column) -> Vector (Maybe [Char])
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((Column -> [Char]) -> Maybe Column -> Maybe [Char]
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Column -> [Char]
columnVersionString) (DataFrame -> Vector (Maybe Column)
columns DataFrame
df)) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
[Int] -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> [Int]
freeIndices DataFrame
df) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"\n" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
(Int, Int) -> [Char]
forall a. Show a => a -> [Char]
show (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)