{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Internal.Row where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Merge as VA
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Control.DeepSeq (NFData (..))
import Control.Exception (throw)
import Control.Monad.ST (runST)
import Data.Function (on)
import Data.Maybe (fromMaybe)
import Data.Type.Equality (TestEquality (..))
import Data.Typeable (type (:~:) (..))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame
import DataFrame.Internal.Expression (Expr (..))
import Text.ParserCombinators.ReadPrec (ReadPrec)
import Text.Read (
Lexeme (Ident),
lexP,
parens,
readListPrec,
readListPrecDefault,
readPrec,
)
import Type.Reflection (typeOf, typeRep)
data Any where
Value :: (Columnable a) => a -> Any
instance Eq Any where
(==) :: Any -> Any -> Bool
(Value a
a) == :: Any -> Any -> Bool
== (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
a
b
instance Ord Any where
(<=) :: Any -> Any -> Bool
(Value a
a) <= :: Any -> Any -> Bool
<= (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
a
b
instance Show Any where
show :: Any -> String
show :: Any -> [Char]
show (Value a
a) = Text -> [Char]
T.unpack (a -> Text
forall a. Columnable a => a -> Text
showValue a
a)
instance NFData Any where
rnf :: Any -> ()
rnf (Value a
a) = a -> ()
forall a. NFData a => a -> ()
rnf a
a
showValue :: forall a. (Columnable a) => a -> T.Text
showValue :: forall a. Columnable a => a -> Text
showValue a
v = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
v
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
v
Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
v
instance Read Any where
readListPrec :: ReadPrec [Any]
readListPrec :: ReadPrec [Any]
readListPrec = ReadPrec [Any]
forall a. Read a => ReadPrec [a]
readListPrecDefault
readPrec :: ReadPrec Any
readPrec :: ReadPrec Any
readPrec = ReadPrec Any -> ReadPrec Any
forall a. ReadPrec a -> ReadPrec a
parens (ReadPrec Any -> ReadPrec Any) -> ReadPrec Any -> ReadPrec Any
forall a b. (a -> b) -> a -> b
$ do
Ident [Char]
"Value" <- ReadPrec Lexeme
lexP
ReadPrec Any
forall a. Read a => ReadPrec a
readPrec
toAny :: forall a. (Columnable a) => a -> Any
toAny :: forall a. Columnable a => a -> Any
toAny = a -> Any
forall a. Columnable a => a -> Any
Value
fromAny :: forall a. (Columnable a) => Any -> Maybe a
fromAny :: forall a. Columnable a => Any -> Maybe a
fromAny (Value (a
v :: b)) = do
a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b)
a -> Maybe a
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
a
v
type Row = V.Vector Any
(!?) :: [a] -> Int -> Maybe a
!? :: forall a. [a] -> Int -> Maybe a
(!?) [] Int
_ = Maybe a
forall a. Maybe a
Nothing
(!?) (a
x : [a]
_) Int
0 = a -> Maybe a
forall a. a -> Maybe a
Just a
x
(!?) (a
x : [a]
xs) Int
n = [a] -> Int -> Maybe a
forall a. [a] -> Int -> Maybe a
(!?) [a]
xs (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows = case [[Any]]
rows of
[] -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
([Any]
row : [[Any]]
_) -> case [Any]
row [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
Maybe Any
Nothing -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
Just (Value (a
v :: a)) -> [a] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([a] -> Column) -> [a] -> Column
forall a b. (a -> b) -> a -> b
$ [a] -> [a]
forall a. [a] -> [a]
reverse ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ ([a] -> [Any] -> [a]) -> [a] -> [[Any]] -> [a]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' [a] -> [Any] -> [a]
addToList [a
v] (Int -> [[Any]] -> [[Any]]
forall a. Int -> [a] -> [a]
drop Int
1 [[Any]]
rows)
where
addToList :: [a] -> [Any] -> [a]
addToList [a]
acc [Any]
r = case [Any]
r [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
Maybe Any
Nothing -> [a]
acc
Just (Value (a
v' :: b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
Maybe (a :~: a)
Nothing -> [a]
acc
Just a :~: a
Refl -> a
v' a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
[a]
acc
toRowList :: DataFrame -> [[(T.Text, Any)]]
toRowList :: DataFrame -> [[(Text, Any)]]
toRowList DataFrame
df =
let
names :: [Text]
names = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df))
in
(Int -> [(Text, Any)]) -> [Int] -> [[(Text, Any)]]
forall a b. (a -> b) -> [a] -> [b]
map
([Text] -> [Any] -> [(Text, Any)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names ([Any] -> [(Text, Any)]) -> (Int -> [Any]) -> Int -> [(Text, Any)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Any -> [Any]
forall a. Vector a -> [a]
V.toList (Vector Any -> [Any]) -> (Int -> Vector Any) -> Int -> [Any]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> [Text] -> Int -> Vector Any
mkRowRep DataFrame
df [Text]
names)
[Int
0 .. ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]
toRowVector :: [T.Text] -> DataFrame -> V.Vector Row
toRowVector :: [Text] -> DataFrame -> Vector (Vector Any)
toRowVector [Text]
names DataFrame
df = Int -> (Int -> Vector Any) -> Vector (Vector Any)
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (DataFrame -> [Text] -> Int -> Vector Any
mkRowRep DataFrame
df [Text]
names)
rowValue :: forall a. Expr a -> [(T.Text, Any)] -> Maybe a
rowValue :: forall a. Expr a -> [(Text, Any)] -> Maybe a
rowValue (Col Text
name) [(Text, Any)]
row = Text -> [(Text, Any)] -> Maybe Any
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
name [(Text, Any)]
row Maybe Any -> (Any -> Maybe a) -> Maybe a
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a. Columnable a => Any -> Maybe a
fromAny @a
rowValue Expr a
_ [(Text, Any)]
_ = [Char] -> Maybe a
forall a. HasCallStack => [Char] -> a
error [Char]
"Can only get rowValue of column reference"
mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row
mkRowFromArgs :: [Text] -> DataFrame -> Int -> Vector Any
mkRowFromArgs [Text]
names DataFrame
df Int
i = (Text -> Any) -> Vector Text -> Vector Any
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Any
get ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
names)
where
get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing ->
DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
Text
name
Text
"[INTERNAL] mkRowFromArgs"
(Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just (BoxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! Int
i)
Just (UnboxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i)
Just (OptionalColumn Vector (Maybe a)
column) -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny (Vector (Maybe a)
column Vector (Maybe a) -> Int -> Maybe a
forall a. Vector a -> Int -> a
V.! Int
i)
mkRowRep :: DataFrame -> [T.Text] -> Int -> Row
mkRowRep :: DataFrame -> [Text] -> Int -> Vector Any
mkRowRep DataFrame
df [Text]
names Int
i = Int -> (Int -> Any) -> Vector Any
forall a. Int -> (Int -> a) -> Vector a
V.generate ([Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
L.length [Text]
names) (\Int
index -> Text -> Any
get (Vector Text
names' Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
index))
where
names' :: Vector Text
names' = [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
names
throwError :: Text -> Any
throwError Text
name =
[Char] -> Any
forall a. HasCallStack => [Char] -> a
error ([Char] -> Any) -> [Char] -> Any
forall a b. (a -> b) -> a -> b
$
[Char]
"Column "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
[Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just (BoxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
Maybe a
Nothing -> Text -> Any
throwError Text
name
Just (OptionalColumn Vector (Maybe a)
c) -> case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
Just Maybe a
e -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny Maybe a
e
Maybe (Maybe a)
Nothing -> Text -> Any
throwError Text
name
Just (UnboxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
Maybe a
Nothing -> Text -> Any
throwError Text
name
Maybe Column
Nothing ->
DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"mkRowRep" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
sortedIndexes' :: [Bool] -> V.Vector Row -> VU.Vector Int
sortedIndexes' :: [Bool] -> Vector (Vector Any) -> Vector Int
sortedIndexes' [Bool]
flipCompare Vector (Vector Any)
rows = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s (Int, Vector Any)
withIndexes <- Vector (Int, Vector Any)
-> ST s (Mutable Vector (PrimState (ST s)) (Int, Vector Any))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
v a -> m (Mutable v (PrimState m) a)
VG.thaw (Vector (Vector Any) -> Vector (Int, Vector Any)
forall a. Vector a -> Vector (Int, a)
V.indexed Vector (Vector Any)
rows)
Comparison (Int, Vector Any)
-> MVector (PrimState (ST s)) (Int, Vector Any) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy ([Bool] -> Vector Any -> Vector Any -> Ordering
produceOrderingFromRow [Bool]
flipCompare (Vector Any -> Vector Any -> Ordering)
-> ((Int, Vector Any) -> Vector Any)
-> Comparison (Int, Vector Any)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, Vector Any) -> Vector Any
forall a b. (a, b) -> b
snd) MVector s (Int, Vector Any)
MVector (PrimState (ST s)) (Int, Vector Any)
withIndexes
Vector (Int, Vector Any)
sorted <- Mutable Vector (PrimState (ST s)) (Int, Vector Any)
-> ST s (Vector (Int, Vector Any))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
VG.unsafeFreeze Mutable Vector (PrimState (ST s)) (Int, Vector Any)
MVector s (Int, Vector Any)
withIndexes
Vector Int -> ST s (Vector Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> ST s (Vector Int))
-> Vector Int -> ST s (Vector Int)
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector (Vector Any) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector (Vector Any)
rows) (\Int
i -> (Int, Vector Any) -> Int
forall a b. (a, b) -> a
fst (Vector (Int, Vector Any)
sorted Vector (Int, Vector Any) -> Int -> (Int, Vector Any)
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))
produceOrderingFromRow :: [Bool] -> Row -> Row -> Ordering
produceOrderingFromRow :: [Bool] -> Vector Any -> Vector Any -> Ordering
produceOrderingFromRow [Bool]
mustFlips Vector Any
v1 Vector Any
v2 = (Ordering -> Ordering -> Ordering)
-> Ordering -> Vector Ordering -> Ordering
forall a b. (a -> b -> b) -> b -> Vector a -> b
V.foldr Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
(<>) Ordering
forall a. Monoid a => a
mempty Vector Ordering
vZipped
where
vFlip :: Vector Bool
vFlip = [Bool] -> Vector Bool
forall a. [a] -> Vector a
V.fromList [Bool]
mustFlips
vZipped :: Vector Ordering
vZipped =
(Bool -> Any -> Any -> Ordering)
-> Vector Bool -> Vector Any -> Vector Any -> Vector Ordering
forall a b c d.
(a -> b -> c -> d) -> Vector a -> Vector b -> Vector c -> Vector d
V.zipWith3 (\Bool
b Any
e1 Any
e2 -> if Bool
b then Any -> Any -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Any
e1 Any
e2 else Any -> Any -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Any
e2 Any
e1) Vector Bool
vFlip Vector Any
v1 Vector Any
v2