{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Aggregation where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Radix as VA
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import Control.Exception (throw)
import Control.Monad
import Control.Monad.ST (runST)
import Data.Bits
import Data.Hashable
import Data.Type.Equality (TestEquality (..), type (:~:) (Refl))
import DataFrame.Errors
import DataFrame.Internal.Column (
Column (..),
TypedColumn (..),
atIndicesStable,
)
import DataFrame.Internal.DataFrame (DataFrame (..), GroupedDataFrame (..))
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset
import Type.Reflection (typeRep)
groupBy ::
[T.Text] ->
DataFrame ->
GroupedDataFrame
groupBy :: [Text] -> DataFrame -> GroupedDataFrame
groupBy [Text]
names DataFrame
df
| (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
columnNames DataFrame
df) [Text]
names =
DataFrameException -> GroupedDataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> GroupedDataFrame)
-> DataFrameException -> GroupedDataFrame
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
(String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> String
forall a. Show a => a -> String
show ([Text] -> String) -> [Text] -> String
forall a b. (a -> b) -> a -> b
$ [Text]
names [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
L.\\ DataFrame -> [Text]
columnNames DataFrame
df)
Text
"groupBy"
(DataFrame -> [Text]
columnNames DataFrame
df)
| DataFrame -> Int
nRows DataFrame
df Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 =
DataFrame
-> [Text]
-> Vector Int
-> Vector Int
-> Vector Int
-> GroupedDataFrame
Grouped
DataFrame
df
[Text]
names
Vector Int
forall a. Unbox a => Vector a
VU.empty
([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0])
Vector Int
forall a. Unbox a => Vector a
VU.empty
| Bool
otherwise =
let !vis :: Vector Int
vis = ((Int, Int) -> Int) -> Vector (Int, Int) -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Int, Int) -> Int
forall a b. (a, b) -> a
fst Vector (Int, Int)
valueIndices
!os :: Vector Int
os = Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
valueIndices
!n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
in DataFrame
-> [Text]
-> Vector Int
-> Vector Int
-> Vector Int
-> GroupedDataFrame
Grouped
DataFrame
df
[Text]
names
Vector Int
vis
Vector Int
os
(Int -> Vector Int -> Vector Int -> Vector Int
buildRowToGroup Int
n Vector Int
vis Vector Int
os)
where
indicesToGroup :: [Int]
indicesToGroup = Map Text Int -> [Int]
forall k a. Map k a -> [a]
M.elems (Map Text Int -> [Int]) -> Map Text Int -> [Int]
forall a b. (a -> b) -> a -> b
$ (Text -> Int -> Bool) -> Map Text Int -> Map Text Int
forall k a. (k -> a -> Bool) -> Map k a -> Map k a
M.filterWithKey (\Text
k Int
_ -> Text
k Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
names) (DataFrame -> Map Text Int
columnIndices DataFrame
df)
doubleToInt :: Double -> Int
doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
valueIndices :: Vector (Int, Int)
valueIndices = (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int))
-> (forall s. ST s (Vector (Int, Int))) -> Vector (Int, Int)
forall a b. (a -> b) -> a -> b
$ do
let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
MVector s (Int, Int)
mv <- Int -> ST s (MVector (PrimState (ST s)) (Int, Int))
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indicesToGroup
[Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
UnboxedColumn (Vector a
v :: VU.Vector a) ->
case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
x -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
x)
)
Vector a
v
Maybe (a :~: Int)
Nothing ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl ->
(Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i Double
d -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
)
Vector a
Vector Double
v
Maybe (a :~: Double)
Nothing ->
case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
SBool (IntegralTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (IntegralTypes a)
SFalse ->
case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
SBool (FloatingTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (FloatingTypes a)
SFalse ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
BoxedColumn (Vector a
v :: V.Vector a) ->
case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
t -> do
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> a -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h a
t)
)
Vector a
v
Maybe (a :~: Text)
Nothing ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
OptionalColumn Vector (Maybe a)
v ->
(Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i Maybe a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
(Int
_, !Int
h) <- MVector (PrimState (ST s)) (Int, Int) -> Int -> ST s (Int, Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i
MVector (PrimState (ST s)) (Int, Int)
-> Int -> (Int, Int) -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv Int
i (Int
i, Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector (Maybe a)
v
let numPasses :: Int
numPasses = Int
4
bucketSize :: Int
bucketSize = Int
65536
radixFunc :: Int -> (a, a) -> b
radixFunc Int
k (a
_, !a
h) =
let h' :: Word
h' = a -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
h Word -> Word -> Word
forall a. Bits a => a -> a -> a
`xor` (Word
1 Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
63) :: Word
shiftBits :: Int
shiftBits = Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
16
in Word -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word
h' Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Int
shiftBits) Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. Word
65535)
Int
-> Int
-> (Int -> (Int, Int) -> Int)
-> MVector (PrimState (ST s)) (Int, Int)
-> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Int -> Int -> (Int -> e -> Int) -> v (PrimState m) e -> m ()
VA.sortBy Int
numPasses Int
bucketSize Int -> (Int, Int) -> Int
forall {a} {b} {a}. (Integral a, Num b) => Int -> (a, a) -> b
radixFunc MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
MVector (PrimState (ST s)) (Int, Int) -> ST s (Vector (Int, Int))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s (Int, Int)
MVector (PrimState (ST s)) (Int, Int)
mv
buildRowToGroup :: Int -> VU.Vector Int -> VU.Vector Int -> VU.Vector Int
buildRowToGroup :: Int -> Vector Int -> Vector Int -> Vector Int
buildRowToGroup Int
n Vector Int
vis Vector Int
os = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
MVector s Int
rtg <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
let nGroups :: Int
nGroups = Vector Int -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Int
os Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
[Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0 .. Int
nGroups Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
k ->
let s :: Int
s = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
os Int
k
e :: Int
e = Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
os (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
in [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
s .. Int
e Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
rtg (Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Int
vis Int
i) Int
k
MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
rtg
{-# NOINLINE buildRowToGroup #-}
changingPoints :: VU.Vector (Int, Int) -> VU.Vector Int
changingPoints :: Vector (Int, Int) -> Vector Int
changingPoints Vector (Int, Int)
vs =
Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.reverse
([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList (Vector (Int, Int) -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector (Int, Int)
vs Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: ([Int], Int) -> [Int]
forall a b. (a, b) -> a
fst ((([Int], Int) -> Int -> (Int, Int) -> ([Int], Int))
-> ([Int], Int) -> Vector (Int, Int) -> ([Int], Int)
forall b a. Unbox b => (a -> Int -> b -> a) -> a -> Vector b -> a
VU.ifoldl' ([Int], Int) -> Int -> (Int, Int) -> ([Int], Int)
forall {b} {a} {a}. Eq b => ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints ([Int], Int)
initialState Vector (Int, Int)
vs)))
where
initialState :: ([Int], Int)
initialState = ([Int
0], (Int, Int) -> Int
forall a b. (a, b) -> b
snd (Vector (Int, Int) -> (Int, Int)
forall a. Unbox a => Vector a -> a
VU.head Vector (Int, Int)
vs))
findChangePoints :: ([a], b) -> a -> (a, b) -> ([a], b)
findChangePoints (![a]
offsets, !b
currentVal) a
index (a
_, !b
newVal)
| b
currentVal b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
newVal = ([a]
offsets, b
currentVal)
| Bool
otherwise = (a
index a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
offsets, b
newVal)
computeRowHashes :: [Int] -> DataFrame -> VU.Vector Int
computeRowHashes :: [Int] -> DataFrame -> Vector Int
computeRowHashes [Int]
indices DataFrame
df = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
let n :: Int
n = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dimensions DataFrame
df)
MVector s Int
mv <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
VUM.new Int
n
let selectedCols :: [Column]
selectedCols = (Int -> Column) -> [Int] -> [Column]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.!) [Int]
indices
[Column] -> (Column -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Column]
selectedCols ((Column -> ST s ()) -> ST s ()) -> (Column -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \case
UnboxedColumn (Vector a
v :: VU.Vector a) ->
case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl ->
(Int -> Int -> ST s ()) -> Vector Int -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i (Int
x :: Int) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
Vector Int
v
Maybe (a :~: Int)
Nothing ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl ->
(Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i (Double
d :: Double) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h (Double -> Int
doubleToInt Double
d))
)
Vector a
Vector Double
v
Maybe (a :~: Double)
Nothing ->
case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
SBool (IntegralTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral @a @Int a
d
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (IntegralTypes a)
SFalse ->
case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
SBool (FloatingTypes a)
STrue ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x :: Int
x = Double -> Int
doubleToInt (a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
d :: Double)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
SBool (FloatingTypes a)
SFalse ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
VU.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
BoxedColumn (Vector a
v :: V.Vector a) ->
case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl ->
(Int -> Text -> ST s ()) -> Vector Text -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i (Text
t :: T.Text) -> do
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Text
t)
)
Vector a
Vector Text
v
Maybe (a :~: Text)
Nothing ->
(Int -> a -> ST s ()) -> Vector a -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (a -> String
forall a. Show a => a -> String
show a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector a
v
OptionalColumn Vector (Maybe a)
v ->
(Int -> Maybe a -> ST s ()) -> Vector (Maybe a) -> ST s ()
forall (m :: * -> *) a b.
Monad m =>
(Int -> a -> m b) -> Vector a -> m ()
V.imapM_
( \Int
i Maybe a
d -> do
let x :: Int
x = String -> Int
forall a. Hashable a => a -> Int
hash (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
d)
Int
h <- MVector (PrimState (ST s)) Int -> Int -> ST s Int
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.unsafeRead MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i
MVector (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
VUM.unsafeWrite MVector s Int
MVector (PrimState (ST s)) Int
mv Int
i (Int -> Int -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
h Int
x)
)
Vector (Maybe a)
v
MVector (PrimState (ST s)) Int -> ST s (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
VU.unsafeFreeze MVector s Int
MVector (PrimState (ST s)) Int
mv
where
doubleToInt :: Double -> Int
doubleToInt :: Double -> Int
doubleToInt = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> (Double -> Double) -> Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1000)
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate :: [NamedExpr] -> GroupedDataFrame -> DataFrame
aggregate [NamedExpr]
aggs gdf :: GroupedDataFrame
gdf@(Grouped DataFrame
df [Text]
groupingColumns Vector Int
valueIndices Vector Int
offsets Vector Int
_rowToGroup) =
let
df' :: DataFrame
df' =
Vector Int -> DataFrame -> DataFrame
selectIndices
((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
valueIndices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
offsets))
([Text] -> DataFrame -> DataFrame
select [Text]
groupingColumns DataFrame
df)
f :: NamedExpr -> DataFrame -> DataFrame
f (Text
name, UExpr (Expr a
expr :: Expr a)) DataFrame
d =
let
value :: Column
value = case forall a.
Columnable a =>
GroupedDataFrame
-> Expr a -> Either DataFrameException (AggregationResult a)
interpretAggregation @a GroupedDataFrame
gdf Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (UnAggregated Column
_) -> DataFrameException -> Column
forall a e. Exception e => e -> a
throw (DataFrameException -> Column) -> DataFrameException -> Column
forall a b. (a -> b) -> a -> b
$ Text -> DataFrameException
UnaggregatedException (String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Expr a -> String
forall a. Show a => a -> String
show Expr a
expr)
Right (Aggregated (TColumn Column
col)) -> Column
col
in
Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
d
in
(NamedExpr -> DataFrame -> DataFrame)
-> [NamedExpr] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold NamedExpr -> DataFrame -> DataFrame
f [NamedExpr]
aggs DataFrame
df'
selectIndices :: VU.Vector Int -> DataFrame -> DataFrame
selectIndices :: Vector Int -> DataFrame -> DataFrame
selectIndices Vector Int
xs DataFrame
df =
DataFrame
df
{ columns = V.map (atIndicesStable xs) (columns df)
, dataframeDimensions = (VU.length xs, V.length (columns df))
}
distinct :: DataFrame -> DataFrame
distinct :: DataFrame -> DataFrame
distinct DataFrame
df = Vector Int -> DataFrame -> DataFrame
selectIndices ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Vector Int
indices Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
VU.!) (Vector Int -> Vector Int
forall a. Unbox a => Vector a -> Vector a
VU.init Vector Int
os)) DataFrame
df
where
(Grouped DataFrame
_ [Text]
_ Vector Int
indices Vector Int
os Vector Int
_rtg) = [Text] -> DataFrame -> GroupedDataFrame
groupBy (DataFrame -> [Text]
columnNames DataFrame
df) DataFrame
df