{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Aggregation where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Merge as VA
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Control.Exception (throw)
import Control.Monad
import Control.Monad.ST (runST)
import Data.Hashable
import Data.Type.Equality (TestEquality (..), type (:~:) (Refl))
import DataFrame.Errors
import DataFrame.Internal.Column (
Column (..),
TypedColumn (..),
atIndicesStable,
)
import DataFrame.Internal.DataFrame (DataFrame (..), GroupedDataFrame (..))
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset
import Type.Reflection (typeRep)
groupBy ::
[T.Text] ->
DataFrame ->
GroupedDataFrame
groupBy :: [Text] -> DataFrame -> GroupedDataFrame
groupBy [Text]
names DataFrame
df
| (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
names =
DataFrameException -> GroupedDataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> GroupedDataFrame)
-> DataFrameException -> GroupedDataFrame
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
(String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> String
forall a. Show a => a -> String
show ([Text] -> String) -> [Text] -> String
forall a b. (a -> b) -> a -> b
$ [Text]
names [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df)
Text
"groupBy"
(DataFrame -> [Text]
columnNames DataFrame
df)
| Bool
otherwise =
DataFrame -> [Text] -> Vector Int -> Vector Int -> GroupedDataFrame
Grouped
DataFrame
df
[Text]
names
(((Int, Int) -> Int) -> Vector (Int, Int) -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Int, Int) -> Int
forall a b. (a, b) -> a
fst Vector (Int, Int)
valueIndices)
(Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
valueIndices)
where
indicesToGroup :: [Int]
indicesToGroup = Map Text Int -> [Int]
forall k a. Map k a -> [a]
M.elems (Map Text Int -> [Int]) -> Map Text Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Text -> Int -> Bool) -> Map Text Int -> Map Text Int
forall k a. (k -> a -> Bool) -> Map k a -> Map k a
M.filterWithKey (\Text
k Int
_ -> Text
k Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
names) (DataFrame -> Map Text Int
columnIndices DataFrame
df)
doubleToInt :: Double -> Int
doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
valueIndices :: Vector (Int, Int)
valueIndices = (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int))
-> (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
MVector s (Int, Int)
mv <- Int -> ST s (MVector (PrimState (ST s)) (Int, Int))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indicesToGroup
[Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
UnboxedColumn (Vector a
v :: VU.Vector a) ->
case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
x -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
x)
)
Vector a
v
Maybe (a :~: Int)
Nothing ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl ->
(Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i Double
d -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
)
Vector a
Vector Double
v
Maybe (a :~: Double)
Nothing ->
case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
SBool (IntegralTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (IntegralTypes a)
SFalse ->
case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
SBool (FloatingTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (FloatingTypes a)
SFalse ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
BoxedColumn (Vector a
v :: V.Vector a) ->
case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
t -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
t)
)
Vector a
v
Maybe (a :~: Text)
Nothing ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
OptionalColumn Vector (Maybe a)
v ->
(Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i Maybe a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector (Maybe a)
v
Comparison (Int, Int)
-> MVector (PrimState (ST s)) (Int, Int) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy (\(!Int
a, !Int
b) (!Int
a', !Int
b') -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
b' Int
b) MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
MVector (PrimState (ST s)) (Int, Int) -> ST s (Vector (Int, Int))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
changingPoints :: VU.Vector (Int, Int) -> VU.Vector Int
changingPoints :: Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
vs =
Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.reverse
([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList (Vector (Int, Int) -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector (Int, Int)
vs Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: ([Int], Int) -> [Int]
forall a b. (a, b) -> a
fst ((([Int], Int) -> Int -> (Int, Int) -> ([Int], Int))
-> ([Int], Int) -> Vector (Int, Int) -> ([Int], Int)
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
VU.ifoldl' ([Int], Int) -> Int -> (Int, Int) -> ([Int], Int)
forall {b} {a} {a}. Eq b => ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints ([Int], Int)
initialState Vector (Int, Int)
vs)))
where
initialState :: ([Int], Int)
initialState = ([Int
0], (Int, Int) -> Int
forall a b. (a, b) -> b
snd (Vector (Int, Int) -> (Int, Int)
forall a. Unbox a => Vector a -> a
VU.head Vector (Int, Int)
vs))
findChangePoints :: ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints (![a]
offsets, !b
currentVal) a
index (a
_, !b
newVal)
| b
currentVal b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
newVal = ([a]
offsets, b
currentVal)
| Bool
otherwise = (a
index a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
offsets, b
newVal)
computeRowHashes :: [Int] -> DataFrame -> VU.Vector Int
computeRowHashes :: [Int] -> DataFrame -> Vector Int
computeRowHashes [Int]
indices DataFrame
df = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
MVector s Int
mv <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indices
[Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
UnboxedColumn (Vector a
v :: VU.Vector a) ->
case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl ->
(Int -> Int -> ST s ()) -> Vector Int -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i (Int
x :: Int) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
Vector Int
v
Maybe (a :~: Int)
Nothing ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl ->
(Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i (Double
d :: Double) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
)
Vector a
Vector Double
v
Maybe (a :~: Double)
Nothing ->
case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
SBool (IntegralTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (IntegralTypes a)
SFalse ->
case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
SBool (FloatingTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (FloatingTypes a)
SFalse ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
BoxedColumn (Vector a
v :: V.Vector a) ->
case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl ->
(Int -> Text -> ST s ()) -> Vector Text -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i (Text
t :: T.Text) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Text
t)
)
Vector a
Vector Text
v
Maybe (a :~: Text)
Nothing ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
OptionalColumn Vector (Maybe a)
v ->
(Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i Maybe a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector (Maybe a)
v
MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
mv
where
doubleToInt :: Double -> Int
doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate [NamedExpr]
aggs gdf :: GroupedDataFrame
gdf@(Grouped DataFrame
df [Text]
groupingColumns Vector Int
valueIndices Vector Int
offsets) =
let
df' :: DataFrame
df' =
Vector Int -> DataFrame -> DataFrame
selectIndices
((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
valueIndices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
offsets))
([Text] -> DataFrame -> DataFrame
select [Text]
groupingColumns DataFrame
df)
f :: NamedExpr -> DataFrame -> DataFrame
f (Text
name, Wrap (Expr a
expr :: Expr a)) DataFrame
d =
let
value :: Column
value = case forall a.
Columnable a =>
GroupedDataFrame
-> Expr a -> Either DataFrameException (AggregationResult a)
interpretAggregation @a GroupedDataFrame
gdf Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (UnAggregated Column
_) -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw (DataFrameException -> Column) -> DataFrameException -> Column
forall a b. (a -> b) -> a -> b
$ Text -> DataFrameException
UnaggregatedException (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Expr a -> String
forall a. Show a => a -> String
show Expr a
expr)
Right (Aggregated (TColumn Column
col)) -> Column
col
in
Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
d
in
(NamedExpr -> DataFrame -> DataFrame)
-> [NamedExpr] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold NamedExpr -> DataFrame -> DataFrame
f [NamedExpr]
aggs DataFrame
df'
selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
selectIndices :: Vector Int -> DataFrame -> DataFrame
selectIndices Vector Int
xs DataFrame
df =
DataFrame
df
{ columns = V.map (atIndicesStable xs) (columns df)
, dataframeDimensions = (VU.length xs, V.length (columns df))
}
distinct :: DataFrame -> DataFrame
distinct :: DataFrame -> DataFrame
distinct DataFrame
df = Vector Int -> DataFrame -> DataFrame
selectIndices ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
indices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
os)) DataFrame
df
where
(Grouped DataFrame
_ [Text]
_ Vector Int
indices Vector Int
os) = [Text] -> DataFrame -> GroupedDataFrame
groupBy (DataFrame -> [Text]
columnNames DataFrame
df) DataFrame
df