{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Internal.Row where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Merge as VA
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Control.Exception (throw)
import Control.Monad.ST (runST)
import Data.Function (on)
import Data.Maybe (fromMaybe)
import Data.Type.Equality (TestEquality (..))
import Data.Typeable (type (:~:) (..))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame
import Text.ParserCombinators.ReadPrec (ReadPrec)
import Text.Read (
Lexeme (Ident),
lexP,
parens,
readListPrec,
readListPrecDefault,
readPrec,
)
import Type.Reflection (typeOf, typeRep)
data Any where
Value :: (Columnable a) => a -> Any
instance Eq Any where
(==) :: Any -> Any -> Bool
(Value a
a) == :: Any -> Any -> Bool
== (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
a
b
instance Ord Any where
(<=) :: Any -> Any -> Bool
(Value a
a) <= :: Any -> Any -> Bool
<= (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
a
b
instance Show Any where
show :: Any -> String
show :: Any -> [Char]
show (Value a
a) = Text -> [Char]
T.unpack (a -> Text
forall a. Columnable a => a -> Text
showValue a
a)
showValue :: forall a. (Columnable a) => a -> T.Text
showValue :: forall a. Columnable a => a -> Text
showValue a
v = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
v
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
v
Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
v
instance Read Any where
readListPrec :: ReadPrec [Any]
readListPrec :: ReadPrec [Any]
readListPrec = ReadPrec [Any]
forall a. Read a => ReadPrec [a]
readListPrecDefault
readPrec :: ReadPrec Any
readPrec :: ReadPrec Any
readPrec = ReadPrec Any -> ReadPrec Any
forall a. ReadPrec a -> ReadPrec a
parens (ReadPrec Any -> ReadPrec Any) -> ReadPrec Any -> ReadPrec Any
forall a b. (a -> b) -> a -> b
$ do
Ident [Char]
"Value" <- ReadPrec Lexeme
lexP
ReadPrec Any
forall a. Read a => ReadPrec a
readPrec
toAny :: forall a. (Columnable a) => a -> Any
toAny :: forall a. Columnable a => a -> Any
toAny = a -> Any
forall a. Columnable a => a -> Any
Value
fromAny :: forall a. (Columnable a) => Any -> Maybe a
fromAny :: forall a. Columnable a => Any -> Maybe a
fromAny (Value (a
v :: b)) = do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b)
a -> Maybe a
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
a
v
type Row = V.Vector Any
(!?) :: [a] -> Int -> Maybe a
!? :: forall a. [a] -> Int -> Maybe a
(!?) [] Int
_ = Maybe a
forall a. Maybe a
Nothing
(!?) (a
x : [a]
_) Int
0 = a -> Maybe a
forall a. a -> Maybe a
Just a
x
(!?) (a
x : [a]
xs) Int
n = [a] -> Int -> Maybe a
forall a. [a] -> Int -> Maybe a
(!?) [a]
xs (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows = case [[Any]]
rows of
[] -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
([Any]
row : [[Any]]
_) -> case [Any]
row [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
Maybe Any
Nothing -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
Just (Value (a
v :: a)) -> [a] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([a] -> Column) -> [a] -> Column
forall a b. (a -> b) -> a -> b
$ [a] -> [a]
forall a. [a] -> [a]
reverse ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ ([a] -> [Any] -> [a]) -> [a] -> [[Any]] -> [a]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' [a] -> [Any] -> [a]
addToList [a
v] (Int -> [[Any]] -> [[Any]]
forall a. Int -> [a] -> [a]
drop Int
1 [[Any]]
rows)
where
addToList :: [a] -> [Any] -> [a]
addToList [a]
acc [Any]
r = case [Any]
r [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
Maybe Any
Nothing -> [a]
acc
Just (Value (a
v' :: b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
Maybe (a :~: a)
Nothing -> [a]
acc
Just a :~: a
Refl -> a
v' a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
[a]
acc
toRowList :: DataFrame -> [Row]
toRowList :: DataFrame -> [Row]
toRowList DataFrame
df =
let
nameSet :: Set Text
nameSet =
[Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
in
(Int -> Row) -> [Int] -> [Row]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet) [Int
0 .. ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
toRowVector :: [T.Text] -> DataFrame -> V.Vector Row
toRowVector :: [Text] -> DataFrame -> Vector Row
toRowVector [Text]
names DataFrame
df =
let
nameSet :: Set Text
nameSet = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList [Text]
names
in
Int -> (Int -> Row) -> Vector Row
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet)
mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row
mkRowFromArgs :: [Text] -> DataFrame -> Int -> Row
mkRowFromArgs [Text]
names DataFrame
df Int
i = (Text -> Any) -> Vector Text -> Row
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Any
get ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
names)
where
get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing ->
DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
Text
name
Text
"[INTERNAL] mkRowFromArgs"
(Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just (BoxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! Int
i)
Just (UnboxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i)
Just (OptionalColumn Vector (Maybe a)
column) -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny (Vector (Maybe a)
column Vector (Maybe a) -> Int -> Maybe a
forall a. Vector a -> Int -> a
V.! Int
i)
mkRowRep :: DataFrame -> S.Set T.Text -> Int -> Row
mkRowRep :: DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
names Int
i = Int -> (Int -> Any) -> Row
forall a. Int -> (Int -> a) -> Vector a
V.generate (Set Text -> Int
forall a. Set a -> Int
S.size Set Text
names) (\Int
index -> Text -> Any
get (Vector Text
names' Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
index))
where
inOrderIndexes :: [Text]
inOrderIndexes = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)
names' :: Vector Text
names' = [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
n | Text
n <- [Text]
inOrderIndexes, Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member Text
n Set Text
names]
throwError :: Text -> Any
throwError Text
name =
[Char] -> Any
forall a. HasCallStack => [Char] -> a
error ([Char] -> Any) -> [Char] -> Any
forall a b. (a -> b) -> a -> b
$
[Char]
"Column "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just (BoxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
Maybe a
Nothing -> Text -> Any
throwError Text
name
Just (OptionalColumn Vector (Maybe a)
c) -> case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just Maybe a
e -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny Maybe a
e
Maybe (Maybe a)
Nothing -> Text -> Any
throwError Text
name
Just (UnboxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
Maybe a
Nothing -> Text -> Any
throwError Text
name
Maybe Column
Nothing ->
DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"mkRowRep" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
sortedIndexes' :: Bool -> V.Vector Row -> VU.Vector Int
sortedIndexes' :: Bool -> Vector Row -> Vector Int
sortedIndexes' Bool
asc Vector Row
rows = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s (Int, Row)
withIndexes <- Vector (Int, Row)
-> ST s (Mutable Vector (PrimState (ST s)) (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
v a -> m (Mutable v (PrimState m) a)
VG.thaw (Vector Row -> Vector (Int, Row)
forall a. Vector a -> Vector (Int, a)
V.indexed Vector Row
rows)
Comparison (Int, Row)
-> MVector (PrimState (ST s)) (Int, Row) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy ((if Bool
asc then Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare else (Row -> Row -> Ordering) -> Row -> Row -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) (Row -> Row -> Ordering)
-> ((Int, Row) -> Row) -> Comparison (Int, Row)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, Row) -> Row
forall a b. (a, b) -> b
snd) MVector s (Int, Row)
MVector (PrimState (ST s)) (Int, Row)
withIndexes
Vector (Int, Row)
sorted <- Mutable Vector (PrimState (ST s)) (Int, Row)
-> ST s (Vector (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
VG.unsafeFreeze Mutable Vector (PrimState (ST s)) (Int, Row)
MVector s (Int, Row)
withIndexes
Vector Int -> ST s (Vector Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> ST s (Vector Int))
-> Vector Int -> ST s (Vector Int)
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector Row -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector Row
rows) (\Int
i -> (Int, Row) -> Int
forall a b. (a, b) -> a
fst (Vector (Int, Row)
sorted Vector (Int, Row) -> Int -> (Int, Row)
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))