{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Aggregation where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Radix as VA
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM

import Control.Exception (throw)
import Control.Monad
import Control.Monad.ST (runST)
import Data.Bits
import Data.Hashable
import Data.Type.Equality (TestEquality (..), type (:~:) (Refl))
import DataFrame.Errors
import DataFrame.Internal.Column (
    Column (..),
    TypedColumn (..),
    atIndicesStable,
 )
import DataFrame.Internal.DataFrame (DataFrame (..), GroupedDataFrame (..))
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset
import Type.Reflection (typeRep)

{- | O(k * n) groups the dataframe by the given rows aggregating the remaining rows
into vector that should be reduced later.
-}
groupBy ::
    [T.Text] ->
    DataFrame ->
    GroupedDataFrame
groupBy :: [Text] -> DataFrame -> GroupedDataFrame
groupBy [Text]
names DataFrame
df
    | (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
names =
        DataFrameException -> GroupedDataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> GroupedDataFrame)
-> DataFrameException -> GroupedDataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
                (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> String
forall a. Show a => a -> String
show ([Text] -> String) -> [Text] -> String
forall a b. (a -> b) -> a -> b
$ [Text]
names [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df)
                Text
"groupBy"
                (DataFrame -> [Text]
columnNames DataFrame
df)
    | DataFrame -> Int
nRows DataFrame
df Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 =
        DataFrame
-> [Text]
-> Vector Int
-> Vector Int
-> Vector Int
-> GroupedDataFrame
Grouped
            DataFrame
df
            [Text]
names
            Vector Int
forall a. Unbox a => Vector a
VU.empty
            ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0])
            Vector Int
forall a. Unbox a => Vector a
VU.empty
    | Bool
otherwise =
        let !vis :: Vector Int
vis = ((Int, Int) -> Int) -> Vector (Int, Int) -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Int, Int) -> Int
forall a b. (a, b) -> a
fst Vector (Int, Int)
valueIndices
            !os :: Vector Int
os = Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
valueIndices
            !n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
         in DataFrame
-> [Text]
-> Vector Int
-> Vector Int
-> Vector Int
-> GroupedDataFrame
Grouped
                DataFrame
df
                [Text]
names
                Vector Int
vis
                Vector Int
os
                (Int -> Vector Int -> Vector Int -> Vector Int
buildRowToGroup Int
n Vector Int
vis Vector Int
os)
  where
    indicesToGroup :: [Int]
indicesToGroup = Map Text Int -> [Int]
forall k a. Map k a -> [a]
M.elems (Map Text Int -> [Int]) -> Map Text Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Text -> Int -> Bool) -> Map Text Int -> Map Text Int
forall k a. (k -> a -> Bool) -> Map k a -> Map k a
M.filterWithKey (\Text
k Int
_ -> Text
k Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
names) (DataFrame -> Map Text Int
columnIndices DataFrame
df)
    doubleToInt :: Double -> Int
    doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
    valueIndices :: Vector (Int, Int)
valueIndices = (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int))
-> (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
        let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
        MVector s (Int, Int)
mv <- Int -> ST s (MVector (PrimState (ST s)) (Int, Int))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n

        let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indicesToGroup

        [Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
            UnboxedColumn (Vector a
v :: VU.Vector a) ->
                case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
                    Just a :~: Int
Refl ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                            ( \Int
i a
x -> do
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
x)
                            )
                            Vector a
v
                    Maybe (a :~: Int)
Nothing ->
                        case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                            Just a :~: Double
Refl ->
                                (Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                    ( \Int
i Double
d -> do
                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
                                    )
                                    Vector a
Vector Double
v
                            Maybe (a :~: Double)
Nothing ->
                                case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
                                    SBool (IntegralTypes a)
STrue ->
                                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                            ( \Int
i a
d -> do
                                                let x :: Int
                                                    x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
                                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                            )
                                            Vector a
v
                                    SBool (IntegralTypes a)
SFalse ->
                                        case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
                                            SBool (FloatingTypes a)
STrue ->
                                                (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                    ( \Int
i a
d -> do
                                                        let x :: Int
                                                            x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
                                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                    )
                                                    Vector a
v
                                            SBool (FloatingTypes a)
SFalse ->
                                                (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                    ( \Int
i a
d -> do
                                                        let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                    )
                                                    Vector a
v
            BoxedColumn (Vector a
v :: V.Vector a) ->
                case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                    Just a :~: Text
Refl ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                            ( \Int
i a
t -> do
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
t)
                            )
                            Vector a
v
                    Maybe (a :~: Text)
Nothing ->
                        (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                            ( \Int
i a
d -> do
                                let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                                MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                            )
                            Vector a
v
            OptionalColumn Vector (Maybe a)
v ->
                (Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                    ( \Int
i Maybe a
d -> do
                        let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
                        (Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
                        MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                    )
                    Vector (Maybe a)
v

        let numPasses :: Int
numPasses = Int
4
            bucketSize :: Int
bucketSize = Int
65536
            radixFunc :: Int -> (a, a) -> b
radixFunc Int
k (a
_, !a
h) =
                let h' :: Word
h' = a -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
h Word -> Word -> Word
forall a. Bits a => a -> a -> a
`xor` (Word
1 Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
63) :: Word
                    shiftBits :: Int
shiftBits = Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
16
                 in Word -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word
h' Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
shiftBits) Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
65535)
        Int
-> Int
-> (Int -> (Int, Int) -> Int)
-> MVector (PrimState (ST s)) (Int, Int)
-> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Int -> Int -> (Int -> e -> Int) -> v (PrimState m) e -> m ()
VA.sortBy Int
numPasses Int
bucketSize Int -> (Int, Int) -> Int
forall {a} {b} {a}. (Integral a, Num b) => Int -> (a, a) -> b
radixFunc MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
        MVector (PrimState (ST s)) (Int, Int) -> ST s (Vector (Int, Int))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv

{- | Build the rowToGroup lookup vector from valueIndices and offsets.
rowToGroup[i] = k means row i belongs to group k.
-}
buildRowToGroup :: Int -> VU.Vector Int -> VU.Vector Int -> VU.Vector Int
buildRowToGroup :: Int -> Vector Int -> Vector Int -> Vector Int
buildRowToGroup Int
n Vector Int
vis Vector Int
os = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
    MVector s Int
rtg <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
    let nGroups :: Int
nGroups = Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
os Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
nGroups Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
k ->
        let s :: Int
s = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
os Int
k
            e :: Int
e = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
os (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
         in [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
s .. Int
e Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
                MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
rtg (Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
vis Int
i) Int
k
    MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
rtg
{-# NOINLINE buildRowToGroup #-}

changingPoints :: VU.Vector (Int, Int) -> VU.Vector Int
changingPoints :: Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
vs =
    Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.reverse
        ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList (Vector (Int, Int) -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector (Int, Int)
vs Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: ([Int], Int) -> [Int]
forall a b. (a, b) -> a
fst ((([Int], Int) -> Int -> (Int, Int) -> ([Int], Int))
-> ([Int], Int) -> Vector (Int, Int) -> ([Int], Int)
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
VU.ifoldl' ([Int], Int) -> Int -> (Int, Int) -> ([Int], Int)
forall {b} {a} {a}. Eq b => ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints ([Int], Int)
initialState Vector (Int, Int)
vs)))
  where
    initialState :: ([Int], Int)
initialState = ([Int
0], (Int, Int) -> Int
forall a b. (a, b) -> b
snd (Vector (Int, Int) -> (Int, Int)
forall a. Unbox a => Vector a -> a
VU.head Vector (Int, Int)
vs))
    findChangePoints :: ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints (![a]
offsets, !b
currentVal) a
index (a
_, !b
newVal)
        | b
currentVal b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
newVal = ([a]
offsets, b
currentVal)
        | Bool
otherwise = (a
index a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
offsets, b
newVal)

computeRowHashes :: [Int] -> DataFrame -> VU.Vector Int
computeRowHashes :: [Int] -> DataFrame -> Vector Int
computeRowHashes [Int]
indices DataFrame
df = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
    let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
    MVector s Int
mv <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n

    let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indices

    [Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
        UnboxedColumn (Vector a
v :: VU.Vector a) ->
            case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
                Just a :~: Int
Refl ->
                    (Int -> Int -> ST s ()) -> Vector Int -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                        ( \Int
i (Int
x :: Int) -> do
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                        )
                        Vector a
Vector Int
v
                Maybe (a :~: Int)
Nothing ->
                    case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                        Just a :~: Double
Refl ->
                            (Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                ( \Int
i (Double
d :: Double) -> do
                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
                                )
                                Vector a
Vector Double
v
                        Maybe (a :~: Double)
Nothing ->
                            case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
                                SBool (IntegralTypes a)
STrue ->
                                    (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                        ( \Int
i a
d -> do
                                            let x :: Int
                                                x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
                                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                        )
                                        Vector a
v
                                SBool (IntegralTypes a)
SFalse ->
                                    case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
                                        SBool (FloatingTypes a)
STrue ->
                                            (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                ( \Int
i a
d -> do
                                                    let x :: Int
                                                        x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
                                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                )
                                                Vector a
v
                                        SBool (FloatingTypes a)
SFalse ->
                                            (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
                                                ( \Int
i a
d -> do
                                                    let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                                                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                                                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                                                )
                                                Vector a
v
        BoxedColumn (Vector a
v :: V.Vector a) ->
            case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                Just a :~: Text
Refl ->
                    (Int -> Text -> ST s ()) -> Vector Text -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                        ( \Int
i (Text
t :: T.Text) -> do
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Text
t)
                        )
                        Vector a
Vector Text
v
                Maybe (a :~: Text)
Nothing ->
                    (Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                        ( \Int
i a
d -> do
                            let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
                            Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                            MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                        )
                        Vector a
v
        OptionalColumn Vector (Maybe a)
v ->
            (Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
                ( \Int
i Maybe a
d -> do
                    let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
                    Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
                    MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
                )
                Vector (Maybe a)
v

    MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
mv
  where
    doubleToInt :: Double -> Int
    doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)

{- | Aggregate a grouped dataframe using the expressions given.
All ungrouped columns will be dropped.
-}
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate [NamedExpr]
aggs gdf :: GroupedDataFrame
gdf@(Grouped DataFrame
df [Text]
groupingColumns Vector Int
valueIndices Vector Int
offsets Vector Int
_rowToGroup) =
    let
        df' :: DataFrame
df' =
            Vector Int -> DataFrame -> DataFrame
selectIndices
                ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
valueIndices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
offsets))
                ([Text] -> DataFrame -> DataFrame
select [Text]
groupingColumns DataFrame
df)

        f :: NamedExpr -> DataFrame -> DataFrame
f (Text
name, UExpr (Expr a
expr :: Expr a)) DataFrame
d =
            let
                value :: Column
value = case forall a.
Columnable a =>
GroupedDataFrame
-> Expr a -> Either DataFrameException (AggregationResult a)
interpretAggregation @a GroupedDataFrame
gdf Expr a
expr of
                    Left DataFrameException
e -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right (UnAggregated Column
_) -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw (DataFrameException -> Column) -> DataFrameException -> Column
forall a b. (a -> b) -> a -> b
$ Text -> DataFrameException
UnaggregatedException (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Expr a -> String
forall a. Show a => a -> String
show Expr a
expr)
                    Right (Aggregated (TColumn Column
col)) -> Column
col
             in
                Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
d
     in
        (NamedExpr -> DataFrame -> DataFrame)
-> [NamedExpr] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold NamedExpr -> DataFrame -> DataFrame
f [NamedExpr]
aggs DataFrame
df'

selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
selectIndices :: Vector Int -> DataFrame -> DataFrame
selectIndices Vector Int
xs DataFrame
df =
    DataFrame
df
        { columns = V.map (atIndicesStable xs) (columns df)
        , dataframeDimensions = (VU.length xs, V.length (columns df))
        }

-- | Filter out all non-unique values in a dataframe.
distinct :: DataFrame -> DataFrame
distinct :: DataFrame -> DataFrame
distinct DataFrame
df = Vector Int -> DataFrame -> DataFrame
selectIndices ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
indices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
os)) DataFrame
df
  where
    (Grouped DataFrame
_ [Text]
_ Vector Int
indices Vector Int
os Vector Int
_rtg) = [Text] -> DataFrame -> GroupedDataFrame
groupBy (DataFrame -> [Text]
columnNames DataFrame
df) DataFrame
df