{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Aggregation where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Merge as VA
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM

import Control.Exception (throw)
import Control.Monad
import Control.Monad.ST (runST)
import Data.Hashable
import Data.Type.Equality (TestEquality (..), type (:~:) (Refl))
import DataFrame.Errors
import DataFrame.Internal.Column (
    Column (..),
    TypedColumn (..),
    atIndicesStable,
 )
import DataFrame.Internal.DataFrame (DataFrame (..), GroupedDataFrame (..))
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset
import Type.Reflection (typeRep)

{- | O(k * n) groups the dataframe by the given rows aggregating the remaining rows
into vector that should be reduced later.
-}
groupBy ::
    [T.Text] ->
    DataFrame ->
    GroupedDataFrame
groupBy :: [Text] -> DataFrame -> GroupedDataFrame
groupBy [Text]
names DataFrame
df
    | (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
names =
        DataFrameException -> GroupedDataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> GroupedDataFrame)
-> DataFrameException -> GroupedDataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
                (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> String
forall a. Show a => a -> String
show ([Text] -> String) -> [Text] -> String
forall a b. (a -> b) -> a -> b
$ [Text]
names [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df)
                Text
"groupBy"
                (DataFrame -> [Text]
columnNames DataFrame
df)
    | Bool
otherwise =
        DataFrame -> [Text] -> Vector Int -> Vector Int -> GroupedDataFrame
Grouped
            DataFrame
df
            [Text]
names
            (((Int, Int) -> Int) -> Vector (Int, Int) -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Int, Int) -> Int
forall a b. (a, b) -> a
fst Vector (Int, Int)
valueIndices)
            (Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
valueIndices)
  where
    indicesToGroup :: [Int]
indicesToGroup = Map Text Int -> [Int]
forall k a. Map k a -> [a]
M.elems (Map Text Int -> [Int]) -> Map Text Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Text -> Int -> Bool) -> Map Text Int -> Map Text Int
forall k a. (k -> a -> Bool) -> Map k a -> Map k a
M.filterWithKey (\Text
k Int
_ -> Text
k Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
names) (DataFrame -> Map Text Int
columnIndices DataFrame
df)
    doubleToInt :: Double -> Int
    doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
    valueIndices :: Vector (Int, Int)
valueIndices = (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int))
-> (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
        let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
        MVector s (Int, Int)
mv <- Int -> ST s (MVector (PrimState (ST s)) (Int, Int))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n

        let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indicesToGroup

        [Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
            UnboxedColumn (Vector a
v :: VU.Vector a) ->
                case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
                    Just a :~: Int
Refl ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                            ( \Int
i a
x -> do
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
x)
                            )
                            Vector a
v
                    Maybe (a :~: Int)
Nothing ->
                        case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                            Just a :~: Double
Refl ->
                                (Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                    ( \Int
i Double
d -> do
                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
                                    )
                                    Vector a
Vector Double
v
                            Maybe (a :~: Double)
Nothing ->
                                case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
                                    SBool (IntegralTypes a)
STrue ->
                                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                            ( \Int
i a
d -> do
                                                let x :: Int
                                                    x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
                                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                            )
                                            Vector a
v
                                    SBool (IntegralTypes a)
SFalse ->
                                        case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
                                            SBool (FloatingTypes a)
STrue ->
                                                (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                    ( \Int
i a
d -> do
                                                        let x :: Int
                                                            x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
                                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                    )
                                                    Vector a
v
                                            SBool (FloatingTypes a)
SFalse ->
                                                (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                    ( \Int
i a
d -> do
                                                        let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                    )
                                                    Vector a
v
            BoxedColumn (Vector a
v :: V.Vector a) ->
                case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                    Just a :~: Text
Refl ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                            ( \Int
i a
t -> do
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
t)
                            )
                            Vector a
v
                    Maybe (a :~: Text)
Nothing ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                            ( \Int
i a
d -> do
                                let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                            )
                            Vector a
v
            OptionalColumn Vector (Maybe a)
v ->
                (Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                    ( \Int
i Maybe a
d -> do
                        let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                    )
                    Vector (Maybe a)
v

        Comparison (Int, Int)
-> MVector (PrimState (ST s)) (Int, Int) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy (\(!Int
a, !Int
b) (!Int
a', !Int
b') -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
b' Int
b) MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
        MVector (PrimState (ST s)) (Int, Int) -> ST s (Vector (Int, Int))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv

changingPoints :: VU.Vector (Int, Int) -> VU.Vector Int
changingPoints :: Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
vs =
    Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.reverse
        ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList (Vector (Int, Int) -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector (Int, Int)
vs Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: ([Int], Int) -> [Int]
forall a b. (a, b) -> a
fst ((([Int], Int) -> Int -> (Int, Int) -> ([Int], Int))
-> ([Int], Int) -> Vector (Int, Int) -> ([Int], Int)
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
VU.ifoldl' ([Int], Int) -> Int -> (Int, Int) -> ([Int], Int)
forall {b} {a} {a}. Eq b => ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints ([Int], Int)
initialState Vector (Int, Int)
vs)))
  where
    initialState :: ([Int], Int)
initialState = ([Int
0], (Int, Int) -> Int
forall a b. (a, b) -> b
snd (Vector (Int, Int) -> (Int, Int)
forall a. Unbox a => Vector a -> a
VU.head Vector (Int, Int)
vs))
    findChangePoints :: ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints (![a]
offsets, !b
currentVal) a
index (a
_, !b
newVal)
        | b
currentVal b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
newVal = ([a]
offsets, b
currentVal)
        | Bool
otherwise = (a
index a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
offsets, b
newVal)

computeRowHashes :: [Int] -> DataFrame -> VU.Vector Int
computeRowHashes :: [Int] -> DataFrame -> Vector Int
computeRowHashes [Int]
indices DataFrame
df = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
    let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
    MVector s Int
mv <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n

    let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indices

    [Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
        UnboxedColumn (Vector a
v :: VU.Vector a) ->
            case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
                Just a :~: Int
Refl ->
                    (Int -> Int -> ST s ()) -> Vector Int -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                        ( \Int
i (Int
x :: Int) -> do
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                        )
                        Vector a
Vector Int
v
                Maybe (a :~: Int)
Nothing ->
                    case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                        Just a :~: Double
Refl ->
                            (Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                ( \Int
i (Double
d :: Double) -> do
                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
                                )
                                Vector a
Vector Double
v
                        Maybe (a :~: Double)
Nothing ->
                            case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
                                SBool (IntegralTypes a)
STrue ->
                                    (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                        ( \Int
i a
d -> do
                                            let x :: Int
                                                x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
                                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                        )
                                        Vector a
v
                                SBool (IntegralTypes a)
SFalse ->
                                    case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
                                        SBool (FloatingTypes a)
STrue ->
                                            (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                ( \Int
i a
d -> do
                                                    let x :: Int
                                                        x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
                                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                )
                                                Vector a
v
                                        SBool (FloatingTypes a)
SFalse ->
                                            (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                ( \Int
i a
d -> do
                                                    let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                )
                                                Vector a
v
        BoxedColumn (Vector a
v :: V.Vector a) ->
            case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                Just a :~: Text
Refl ->
                    (Int -> Text -> ST s ()) -> Vector Text -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                        ( \Int
i (Text
t :: T.Text) -> do
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Text
t)
                        )
                        Vector a
Vector Text
v
                Maybe (a :~: Text)
Nothing ->
                    (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                        ( \Int
i a
d -> do
                            let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                        )
                        Vector a
v
        OptionalColumn Vector (Maybe a)
v ->
            (Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                ( \Int
i Maybe a
d -> do
                    let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                )
                Vector (Maybe a)
v

    MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
mv
  where
    doubleToInt :: Double -> Int
    doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)

{- | Aggregate a grouped dataframe using the expressions given.
All ungrouped columns will be dropped.
-}
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate [NamedExpr]
aggs gdf :: GroupedDataFrame
gdf@(Grouped DataFrame
df [Text]
groupingColumns Vector Int
valueIndices Vector Int
offsets) =
    let
        df' :: DataFrame
df' =
            Vector Int -> DataFrame -> DataFrame
selectIndices
                ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
valueIndices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
offsets))
                ([Text] -> DataFrame -> DataFrame
select [Text]
groupingColumns DataFrame
df)

        f :: NamedExpr -> DataFrame -> DataFrame
f (Text
name, Wrap (Expr a
expr :: Expr a)) DataFrame
d =
            let
                value :: Column
value = case forall a.
Columnable a =>
GroupedDataFrame
-> Expr a -> Either DataFrameException (AggregationResult a)
interpretAggregation @a GroupedDataFrame
gdf Expr a
expr of
                    Left DataFrameException
e -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right (UnAggregated Column
_) -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw (DataFrameException -> Column) -> DataFrameException -> Column
forall a b. (a -> b) -> a -> b
$ Text -> DataFrameException
UnaggregatedException (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Expr a -> String
forall a. Show a => a -> String
show Expr a
expr)
                    Right (Aggregated (TColumn Column
col)) -> Column
col
             in
                Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
d
     in
        (NamedExpr -> DataFrame -> DataFrame)
-> [NamedExpr] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold NamedExpr -> DataFrame -> DataFrame
f [NamedExpr]
aggs DataFrame
df'

selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
selectIndices :: Vector Int -> DataFrame -> DataFrame
selectIndices Vector Int
xs DataFrame
df =
    DataFrame
df
        { columns = V.map (atIndicesStable xs) (columns df)
        , dataframeDimensions = (VU.length xs, V.length (columns df))
        }

-- | Filter out all non-unique values in a dataframe.
distinct :: DataFrame -> DataFrame
distinct :: DataFrame -> DataFrame
distinct DataFrame
df = Vector Int -> DataFrame -> DataFrame
selectIndices ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
indices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
os)) DataFrame
df
  where
    (Grouped DataFrame
_ [Text]
_ Vector Int
indices Vector Int
os) = [Text] -> DataFrame -> GroupedDataFrame
groupBy (DataFrame -> [Text]
columnNames DataFrame
df) DataFrame
df