{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Statistics where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Prelude as P
import Control.Exception (throw)
import Data.Function ((&))
import Data.Maybe (fromMaybe, isJust)
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (
DataFrame (..),
empty,
getColumn,
)
import DataFrame.Internal.Expression
import DataFrame.Internal.Interpreter
import DataFrame.Internal.Row (showValue, toAny)
import DataFrame.Internal.Statistics
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset (filterJust)
import DataFrame.Operations.Transformations (impute)
import Text.Printf (printf)
import Type.Reflection (typeRep)
frequencies :: forall a. (Columnable a) => Expr a -> DataFrame -> DataFrame
frequencies :: forall a. Columnable a => Expr a -> DataFrame -> DataFrame
frequencies Expr a
expr DataFrame
df =
let
counts :: [(a, Int)]
counts = Expr a -> DataFrame -> [(a, Int)]
forall a.
(Ord a, Columnable a) =>
Expr a -> DataFrame -> [(a, Int)]
valueCounts Expr a
expr DataFrame
df
calculatePercentage :: [(a, a)] -> a -> Any
calculatePercentage [(a, a)]
cs a
k = String -> Any
forall a. Columnable a => a -> Any
toAny (String -> Any) -> String -> Any
forall a b. (a -> b) -> a -> b
$ Double -> String
toPct2dp (a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
k Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ ((a, a) -> a) -> [(a, a)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a, a) -> a
forall a b. (a, b) -> b
snd [(a, a)]
cs))
initDf :: DataFrame
initDf =
DataFrame
empty
DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
freqs :: Vector a -> DataFrame
freqs Vector a
col =
(DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl'
( \DataFrame
d (a
col, Int
k) ->
Text -> Vector Any -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector
(forall a. Columnable a => a -> Text
showValue @a a
col)
([Any] -> Vector Any
forall a. [a] -> Vector a
V.fromList [Int -> Any
forall a. Columnable a => a -> Any
toAny Int
k, [(a, Int)] -> Int -> Any
forall {a} {a} {a}.
(Integral a, Integral a) =>
[(a, a)] -> a -> Any
calculatePercentage [(a, Int)]
counts Int
k])
DataFrame
d
)
DataFrame
initDf
[(a, Int)]
counts
in
case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector Expr a
expr DataFrame
df of
Left DataFrameException
err -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
err
Right Vector a
column -> Vector a -> DataFrame
freqs Vector a
column
mean ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
mean :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
mean (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df of
Just Vector Double
xs -> Vector Double -> Double
meanDouble' Vector Double
xs
Maybe (Vector Double)
Nothing -> String -> Double
forall a. HasCallStack => String -> a
error String
"[INTERNAL ERROR] Column is non-numeric"
mean Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean' Vector a
xs
meanMaybe ::
forall a. (Columnable a, Real a) => Expr (Maybe a) -> DataFrame -> Double
meanMaybe :: forall a.
(Columnable a, Real a) =>
Expr (Maybe a) -> DataFrame -> Double
meanMaybe (Col Text
name) DataFrame
df =
(Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector)
((DataFrameException -> Vector (Maybe a))
-> (Vector (Maybe a) -> Vector (Maybe a))
-> Either DataFrameException (Vector (Maybe a))
-> Vector (Maybe a)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> Vector (Maybe a)
forall a e. Exception e => e -> a
throw Vector (Maybe a) -> Vector (Maybe a)
forall a. a -> a
id (Expr (Maybe a)
-> DataFrame -> Either DataFrameException (Vector (Maybe a))
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector (forall a. Columnable a => Text -> Expr a
Col @(Maybe a) Text
name) DataFrame
df))
meanMaybe Expr (Maybe a)
expr DataFrame
df = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @(Maybe a) DataFrame
df Expr (Maybe a)
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @(Maybe a) Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector (Maybe a)
xs -> (Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector) Vector (Maybe a)
xs
median ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
median :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
median (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median' Vector a
xs
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
median Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median' Vector a
xs
medianMaybe ::
forall a. (Columnable a, Real a) => Expr (Maybe a) -> DataFrame -> Double
medianMaybe :: forall a.
(Columnable a, Real a) =>
Expr (Maybe a) -> DataFrame -> Double
medianMaybe (Col Text
name) DataFrame
df =
(Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector)
((DataFrameException -> Vector (Maybe a))
-> (Vector (Maybe a) -> Vector (Maybe a))
-> Either DataFrameException (Vector (Maybe a))
-> Vector (Maybe a)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> Vector (Maybe a)
forall a e. Exception e => e -> a
throw Vector (Maybe a) -> Vector (Maybe a)
forall a. a -> a
id (Expr (Maybe a)
-> DataFrame -> Either DataFrameException (Vector (Maybe a))
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector (forall a. Columnable a => Text -> Expr a
Col @(Maybe a) Text
name) DataFrame
df))
medianMaybe Expr (Maybe a)
expr DataFrame
df = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @(Maybe a) DataFrame
df Expr (Maybe a)
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @(Maybe a) Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector (Maybe a)
xs -> (Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector) Vector (Maybe a)
xs
percentile ::
forall a.
(Columnable a, Real a, VU.Unbox a) => Int -> Expr a -> DataFrame -> Double
percentile :: forall a.
(Columnable a, Real a, Unbox a) =>
Int -> Expr a -> DataFrame -> Double
percentile Int
n (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> Int -> Vector a -> Double
forall a. (Unbox a, Num a, Real a) => Int -> Vector a -> Double
percentile' Int
n Vector a
xs
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
percentile Int
n Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Int -> Vector a -> Double
forall a. (Unbox a, Num a, Real a) => Int -> Vector a -> Double
percentile' Int
n Vector a
xs
genericPercentile ::
forall a.
(Columnable a, Ord a) => Int -> Expr a -> DataFrame -> a
genericPercentile :: forall a. (Columnable a, Ord a) => Int -> Expr a -> DataFrame -> a
genericPercentile Int
n (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
Columnable a =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> Int -> Vector a -> a
forall a. (Ord a, Eq a) => Int -> Vector a -> a
percentileOrd' Int
n Vector a
xs
Left DataFrameException
e -> DataFrameException -> a
forall a e. Exception e => e -> a
throw DataFrameException
e
genericPercentile Int
n Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> a
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> a
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Int -> Vector a -> a
forall a. (Ord a, Eq a) => Int -> Vector a -> a
percentileOrd' Int
n Vector a
xs
standardDeviation ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
standardDeviation :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
standardDeviation (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> (Vector a -> Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance') Vector a
xs
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
standardDeviation Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> (Vector a -> Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance') Vector a
xs
skewness ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
skewness :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
skewness (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> Vector a -> Double
forall a. (Unbox a, Real a, Num a) => Vector a -> Double
skewness' Vector a
xs
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
skewness Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> Double
forall a. (Unbox a, Real a, Num a) => Vector a -> Double
skewness' Vector a
xs
variance ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
variance :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
variance (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df of
Just Vector Double
xs -> Vector Double -> Double
varianceDouble' Vector Double
xs
Maybe (Vector Double)
Nothing -> String -> Double
forall a. HasCallStack => String -> a
error String
"[INTERNAL ERROR] Column is non-numeric"
variance Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance' Vector a
xs
interQuartileRange ::
forall a. (Columnable a, Real a, VU.Unbox a) => Expr a -> DataFrame -> Double
interQuartileRange :: forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
interQuartileRange (Col Text
name) DataFrame
df = case Expr a -> DataFrame -> Either DataFrameException (Vector a)
forall a.
(Columnable a, Unbox a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector a)
columnAsUnboxedVector (forall a. Columnable a => Text -> Expr a
Col @a Text
name) DataFrame
df of
Right Vector a
xs -> Vector a -> Double
forall a. (Unbox a, Num a, Real a) => Vector a -> Double
interQuartileRange' Vector a
xs
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
interQuartileRange Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
col) -> case forall a.
(Columnable a, Unbox a) =>
Column -> Either DataFrameException (Vector a)
toUnboxedVector @a Column
col of
Left DataFrameException
e -> DataFrameException -> Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> Double
forall a. (Unbox a, Num a, Real a) => Vector a -> Double
interQuartileRange' Vector a
xs
correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
correlation :: Text -> Text -> DataFrame -> Maybe Double
correlation Text
first Text
second DataFrame
df = do
Vector Double
f <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
first DataFrame
df
Vector Double
s <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
second DataFrame
df
Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
f Vector Double
s
_getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double)
_getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just (UnboxedColumn (Vector a
f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just Vector a
Vector Double
f
Maybe (a :~: Double)
Nothing -> case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
SBool (IntegralTypes a)
STrue -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
f)
SBool (IntegralTypes a)
SFalse -> case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
SBool (FloatingTypes a)
STrue -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
f)
SBool (FloatingTypes a)
SFalse -> Maybe (Vector Double)
forall a. Maybe a
Nothing
Maybe Column
Nothing ->
DataFrameException -> Maybe (Vector Double)
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe (Vector Double))
-> DataFrameException -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"_getColumnAsDouble" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
{-# INLINE _getColumnAsDouble #-}
optionalToDoubleVector :: (Real a) => V.Vector (Maybe a) -> VU.Vector Double
optionalToDoubleVector :: forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector =
[Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList
([Double] -> Vector Double)
-> (Vector (Maybe a) -> [Double])
-> Vector (Maybe a)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> Maybe a -> [Double])
-> [Double] -> Vector (Maybe a) -> [Double]
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
(\[Double]
acc Maybe a
e -> if Maybe a -> Bool
forall a. Maybe a -> Bool
isJust Maybe a
e then a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac (a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
0 Maybe a
e) Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
acc else [Double]
acc)
[]
sum ::
forall a. (Columnable a, Num a) => Expr a -> DataFrame -> a
sum :: forall a. (Columnable a, Num a) => Expr a -> DataFrame -> a
sum (Col Text
name) DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> a
forall a e. Exception e => e -> a
throw (DataFrameException -> a) -> DataFrameException -> a
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"sum" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just a :~: a
Refl -> Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
Maybe (a :~: a)
Nothing -> a
0
Just ((BoxedColumn (Vector a
column :: V.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just a :~: a
Refl -> Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
Maybe (a :~: a)
Nothing -> a
0
Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector (Maybe a')))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just a :~: a
Refl -> Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum ((Maybe a -> a) -> Vector (Maybe a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
0) Vector (Maybe a)
Vector (Maybe a)
column)
Maybe (a :~: a)
Nothing -> a
0
sum Expr a
expr DataFrame
df = case DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> a
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
xs) -> case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @a @V.Vector Column
xs of
Left DataFrameException
e -> DataFrameException -> a
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector a
xs -> Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
xs
imputeWith ::
forall b.
(Columnable b) =>
(Expr b -> Expr b) ->
Expr (Maybe b) ->
DataFrame ->
DataFrame
imputeWith :: forall b.
Columnable b =>
(Expr b -> Expr b) -> Expr (Maybe b) -> DataFrame -> DataFrame
imputeWith Expr b -> Expr b
f col :: Expr (Maybe b)
col@(Col Text
columnName) DataFrame
df = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @b (Text -> DataFrame -> DataFrame
filterJust Text
columnName DataFrame
df) (Expr b -> Expr b
f (forall a. Columnable a => Text -> Expr a
Col @b Text
columnName)) of
Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
value) -> case forall a. Columnable a => Column -> Either DataFrameException a
headColumn @b Column
value of
Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
Right b
h ->
if (b -> Bool) -> [b] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== b
h) (forall a. Columnable a => Column -> [a]
toList @b Column
value)
then Expr (Maybe b) -> b -> DataFrame -> DataFrame
forall b.
Columnable b =>
Expr (Maybe b) -> b -> DataFrame -> DataFrame
impute Expr (Maybe b)
col b
h DataFrame
df
else String -> DataFrame
forall a. HasCallStack => String -> a
error String
"Impute expression returned more than one value"
imputeWith Expr b -> Expr b
_ Expr (Maybe b)
_ DataFrame
df = DataFrame
df
applyStatistic ::
(VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
f Text
name DataFrame
df = Vector Double -> Maybe Double
apply (Vector Double -> Maybe Double)
-> Maybe (Vector Double) -> Maybe Double
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df)
where
apply :: Vector Double -> Maybe Double
apply Vector Double
col =
let
res :: Double
res = Vector Double -> Double
f Vector Double
col
in
if Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Double
res then Maybe Double
forall a. Maybe a
Nothing else Double -> Maybe Double
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
res
{-# INLINE applyStatistic #-}
applyStatistics ::
(VU.Vector Double -> VU.Vector Double) ->
T.Text ->
DataFrame ->
Maybe (VU.Vector Double)
applyStatistics :: (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics Vector Double -> Vector Double
f Text
name DataFrame
df = (Vector Double -> Vector Double)
-> Maybe (Vector Double) -> Maybe (Vector Double)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> Vector Double
f (Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df))
summarize :: DataFrame -> DataFrame
summarize :: DataFrame -> DataFrame
summarize DataFrame
df =
(Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold
Text -> DataFrame -> DataFrame
columnStats
(DataFrame -> [Text]
columnNames DataFrame
df)
( [(Text, Column)] -> DataFrame
fromNamedColumns
[
( Text
"Statistic"
, [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList
[ Text
"Count" :: T.Text
, Text
"Mean"
, Text
"Minimum"
, Text
"25%"
, Text
"Median"
, Text
"75%"
, Text
"Max"
, Text
"StdDev"
, Text
"IQR"
, Text
"Skewness"
]
)
]
)
where
columnStats :: Text -> DataFrame -> DataFrame
columnStats Text
name DataFrame
d =
if (Maybe Double -> Bool) -> [Maybe Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (Text -> [Maybe Double]
stats Text
name)
then
Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector
Text
name
([Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double -> Double
roundTo Int
2 (Double -> Double)
-> (Maybe Double -> Double) -> Maybe Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Text -> [Maybe Double]
stats Text
name))
DataFrame
d
else DataFrame
d
stats :: Text -> [Maybe Double]
stats Text
name =
let
count :: Maybe Double
count = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (Column -> Int) -> Column -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Column -> Int
numElements (Column -> Double) -> Maybe Column -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df
quantiles :: Maybe (Vector Double)
quantiles = (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics (Vector Int -> Int -> Vector Double -> Vector Double
forall a.
(Unbox a, Num a, Real a) =>
Vector Int -> Int -> Vector a -> Vector Double
quantiles' ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0, Int
1, Int
2, Int
3, Int
4]) Int
4) Text
name DataFrame
df
min' :: Maybe Double
min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile1 :: Maybe Double
quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
median' :: Maybe Double
median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile3 :: Maybe Double
quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
max' :: Maybe Double
max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
iqr :: Maybe Double
iqr = (-) (Double -> Double -> Double)
-> Maybe Double -> Maybe (Double -> Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe Double
quartile1
doubleColumn :: Text -> Maybe (Vector Double)
doubleColumn Text
col = Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
col (Text -> DataFrame -> DataFrame
filterJust Text
col DataFrame
df)
in
[ Maybe Double
count
, Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean' (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe (Vector Double)
doubleColumn Text
name
, Maybe Double
min'
, Maybe Double
quartile1
, Maybe Double
median'
, Maybe Double
quartile3
, Maybe Double
max'
, Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance' (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe (Vector Double)
doubleColumn Text
name
, Maybe Double
iqr
, Vector Double -> Double
forall a. (Unbox a, Real a, Num a) => Vector a -> Double
skewness' (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe (Vector Double)
doubleColumn Text
name
]
roundTo :: Int -> Double -> Double
roundTo :: Int -> Double -> Double
roundTo Int
n Double
x = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Integer) -> Double -> Integer
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10 Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10.0 Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
n
toPct2dp :: Double -> String
toPct2dp :: Double -> String
toPct2dp Double
x
| Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.00005 = String
"<0.01%"
| Bool
otherwise = String -> Double -> String
forall r. PrintfType r => String -> r
printf String
"%.2f%%" (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
100)