{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Statistics where
import Data.Bifunctor (second)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Intro as VA
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector.Unboxed.Mutable as VUM
import qualified Statistics.Quantile as SS
import qualified Statistics.Sample as SS
import Prelude as P
import Control.Exception (throw)
import Control.Monad.ST (runST)
import qualified Data.Bifunctor as Data
import Data.Foldable (asum)
import Data.Function ((&))
import Data.Maybe (fromMaybe, isJust)
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..), empty, getColumn, unsafeGetColumn)
import DataFrame.Internal.Row (showValue, toAny)
import DataFrame.Operations.Core
import DataFrame.Operations.Subset (filterJust)
import GHC.Float (int2Double)
import Text.Printf (printf)
import Type.Reflection (typeRep)
frequencies :: T.Text -> DataFrame -> DataFrame
frequencies :: Text -> DataFrame -> DataFrame
frequencies Text
name DataFrame
df =
let
counts :: forall a. (Columnable a) => [(a, Int)]
counts :: forall a. Columnable a => [(a, Int)]
counts = Text -> DataFrame -> [(a, Int)]
forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts Text
name DataFrame
df
calculatePercentage :: [(a, a)] -> a -> Any
calculatePercentage [(a, a)]
cs a
k = String -> Any
forall a. Columnable' a => a -> Any
toAny (String -> Any) -> String -> Any
forall a b. (a -> b) -> a -> b
$ Double -> String
toPct2dp (a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
k Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ ((a, a) -> a) -> [(a, a)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a, a) -> a
forall a b. (a, b) -> b
snd [(a, a)]
cs))
initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
freqs :: forall v a. (VG.Vector v a, Columnable a) => v a -> DataFrame
freqs :: forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs v a
col = (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d (a
col, Int
k) -> Text -> Vector Any -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector (forall a. Columnable' a => a -> Text
showValue @a a
col) ([Any] -> Vector Any
forall a. [a] -> Vector a
V.fromList [Int -> Any
forall a. Columnable' a => a -> Any
toAny Int
k, [(a, Int)] -> Int -> Any
forall {a} {a} {a}.
(Integral a, Integral a) =>
[(a, a)] -> a -> Any
calculatePercentage (forall a. Columnable a => [(a, Int)]
counts @a) Int
k]) DataFrame
d) DataFrame
initDf [(a, Int)]
forall a. Columnable a => [(a, Int)]
counts
in
case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"frequencies" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just ((BoxedColumn (Vector a
column :: V.Vector a))) -> Vector a -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector a
column
Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) -> Vector (Maybe a) -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector (Maybe a)
column
Just ((UnboxedColumn (Vector a
column :: VU.Vector a))) -> Vector a -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector a
column
mean :: T.Text -> DataFrame -> Maybe Double
mean :: Text -> DataFrame -> Maybe Double
mean = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
mean'
median :: T.Text -> DataFrame -> Maybe Double
median :: Text -> DataFrame -> Maybe Double
median = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
median'
standardDeviation :: T.Text -> DataFrame -> Maybe Double
standardDeviation :: Text -> DataFrame -> Maybe Double
standardDeviation = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
variance')
skewness :: T.Text -> DataFrame -> Maybe Double
skewness :: Text -> DataFrame -> Maybe Double
skewness = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.skewness
variance :: T.Text -> DataFrame -> Maybe Double
variance :: Text -> DataFrame -> Maybe Double
variance = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
variance'
interQuartileRange :: T.Text -> DataFrame -> Maybe Double
interQuartileRange :: Text -> DataFrame -> Maybe Double
interQuartileRange = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (ContParam -> Int -> Vector Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> v Double -> Double
SS.midspread ContParam
SS.medianUnbiased Int
4)
correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
correlation :: Text -> Text -> DataFrame -> Maybe Double
correlation Text
first Text
second DataFrame
df = do
Vector Double
f <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
first DataFrame
df
Vector Double
s <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
second DataFrame
df
Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
f Vector Double
s
_getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double)
_getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just (UnboxedColumn (Vector a
f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just Vector a
Vector Double
f
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
f
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
f
Maybe (a :~: Float)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
{-# INLINE _getColumnAsDouble #-}
sum :: forall a. (Columnable a, Num a, VU.Unbox a) => T.Text -> DataFrame -> Maybe a
sum :: forall a.
(Columnable a, Num a, Unbox a) =>
Text -> DataFrame -> Maybe a
sum Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> Maybe a
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe a) -> DataFrameException -> Maybe a
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"sum" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just a :~: a
Refl -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
Maybe (a :~: a)
Nothing -> Maybe a
forall a. Maybe a
Nothing
applyStatistic :: (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df) of
Maybe Column
Nothing -> DataFrameException -> Maybe Double
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe Double)
-> DataFrameException -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"applyStatistic" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just column :: Column
column@(UnboxedColumn (Vector a
col :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl ->
let
res :: Double
res = (Vector Double -> Double
f Vector a
Vector Double
col)
in
if Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Double
res then Maybe Double
forall a. Maybe a
Nothing else Double -> Maybe Double
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
res
Maybe (a :~: Double)
Nothing -> do
Vector Double
col' <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df
let res :: Double
res = (Vector Double -> Double
f Vector Double
col')
if Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Double
res then Maybe Double
forall a. Maybe a
Nothing else Double -> Maybe Double
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
res
Maybe Column
_ -> Maybe Double
forall a. Maybe a
Nothing
{-# INLINE applyStatistic #-}
applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> Maybe (VU.Vector Double)
applyStatistics :: (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics Vector Double -> Vector Double
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df) of
Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
column)
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f Vector a
Vector Double
column
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
column)
Maybe (a :~: Float)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
summarize :: DataFrame -> DataFrame
summarize :: DataFrame -> DataFrame
summarize DataFrame
df = (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold Text -> DataFrame -> DataFrame
columnStats (DataFrame -> [Text]
columnNames DataFrame
df) ([(Text, Column)] -> DataFrame
fromNamedColumns [(Text
"Statistic", [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList [Text
"Count" :: T.Text, Text
"Mean", Text
"Minimum", Text
"25%", Text
"Median", Text
"75%", Text
"Max", Text
"StdDev", Text
"IQR", Text
"Skewness"])])
where
columnStats :: Text -> DataFrame -> DataFrame
columnStats Text
name DataFrame
d = if (Maybe Double -> Bool) -> [Maybe Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (Text -> [Maybe Double]
stats Text
name) then Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name ([Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double -> Double
roundTo Int
2 (Double -> Double)
-> (Maybe Double -> Double) -> Maybe Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Text -> [Maybe Double]
stats Text
name)) DataFrame
d else DataFrame
d
stats :: Text -> [Maybe Double]
stats Text
name =
let
count :: Maybe Double
count = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (Column -> Int) -> Column -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Column -> Int
numElements (Column -> Double) -> Maybe Column -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df
quantiles :: Maybe (Vector Double)
quantiles = (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics (ContParam -> Vector Int -> Int -> Vector Double -> Vector Double
forall (v :: * -> *).
(Vector v Double, Vector v Int) =>
ContParam -> v Int -> Int -> v Double -> v Double
SS.quantilesVec ContParam
SS.medianUnbiased ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0, Int
1, Int
2, Int
3, Int
4]) Int
4) Text
name DataFrame
df
min' :: Maybe Double
min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile1 :: Maybe Double
quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
median' :: Maybe Double
median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile3 :: Maybe Double
quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
max' :: Maybe Double
max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
iqr :: Maybe Double
iqr = (-) (Double -> Double -> Double)
-> Maybe Double -> Maybe (Double -> Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe Double
quartile1
in
[ Maybe Double
count
, Text -> DataFrame -> Maybe Double
mean Text
name DataFrame
df
, Maybe Double
min'
, Maybe Double
quartile1
, Maybe Double
median'
, Maybe Double
quartile3
, Maybe Double
max'
, Text -> DataFrame -> Maybe Double
standardDeviation Text
name DataFrame
df
, Maybe Double
iqr
, Text -> DataFrame -> Maybe Double
skewness Text
name DataFrame
df
]
roundTo :: Int -> Double -> Double
roundTo :: Int -> Double -> Double
roundTo Int
n Double
x = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Integer) -> Double -> Integer
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
10 Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
10.0 Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
n)
toPct2dp :: Double -> String
toPct2dp :: Double -> String
toPct2dp Double
x
| Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.00005 = String
"<0.01%"
| Bool
otherwise = String -> Double -> String
forall r. PrintfType r => String -> r
printf String
"%.2f%%" (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
100)
mean' :: VU.Vector Double -> Double
mean' :: Vector Double -> Double
mean' Vector Double
samp = Vector Double -> Double
forall a. (Unbox a, Num a) => Vector a -> a
VU.sum Vector Double
samp Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
samp)
{-# INLINE mean #-}
median' :: VU.Vector Double -> Double
median' :: Vector Double -> Double
median' Vector Double
samp
| Vector Double -> Bool
forall a. Unbox a => Vector a -> Bool
VU.null Vector Double
samp = DataFrameException -> Double
forall a e. Exception e => e -> a
throw (DataFrameException -> Double) -> DataFrameException -> Double
forall a b. (a -> b) -> a -> b
$ Text -> DataFrameException
EmptyDataSetException Text
"median"
| Bool
otherwise = (forall s. ST s Double) -> Double
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s Double) -> Double)
-> (forall s. ST s Double) -> Double
forall a b. (a -> b) -> a -> b
$ do
MVector s Double
mutableSamp <- Vector Double -> ST s (MVector (PrimState (ST s)) Double)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
VU.thaw Vector Double
samp
MVector (PrimState (ST s)) Double -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e, Ord e) =>
v (PrimState m) e -> m ()
VA.sort MVector s Double
MVector (PrimState (ST s)) Double
mutableSamp
let len :: Int
len = Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
samp
middleIndex :: Int
middleIndex = Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
Double
middleElement <- MVector (PrimState (ST s)) Double -> Int -> ST s Double
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.read MVector s Double
MVector (PrimState (ST s)) Double
mutableSamp Int
middleIndex
if Int -> Bool
forall a. Integral a => a -> Bool
odd Int
len
then Double -> ST s Double
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
middleElement
else do
Double
prev <- MVector (PrimState (ST s)) Double -> Int -> ST s Double
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
VUM.read MVector s Double
MVector (PrimState (ST s)) Double
mutableSamp (Int
middleIndex Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
Double -> ST s Double
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ((Double
middleElement Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
prev) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2)
{-# INLINE median' #-}
data VarAcc = VarAcc !Int !Double !Double deriving (Int -> VarAcc -> ShowS
[VarAcc] -> ShowS
VarAcc -> String
(Int -> VarAcc -> ShowS)
-> (VarAcc -> String) -> ([VarAcc] -> ShowS) -> Show VarAcc
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> VarAcc -> ShowS
showsPrec :: Int -> VarAcc -> ShowS
$cshow :: VarAcc -> String
show :: VarAcc -> String
$cshowList :: [VarAcc] -> ShowS
showList :: [VarAcc] -> ShowS
Show)
step :: VarAcc -> Double -> VarAcc
step :: VarAcc -> Double -> VarAcc
step (VarAcc !Int
n !Double
mean !Double
m2) !Double
x =
let !n' :: Int
n' = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
!delta :: Double
delta = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
mean
!mean' :: Double
mean' = Double
mean Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
delta Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n'
!m2' :: Double
m2' = Double
m2 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
mean')
in Int -> Double -> Double -> VarAcc
VarAcc Int
n' Double
mean' Double
m2'
{-# INLINE step #-}
computeVariance :: VarAcc -> Double
computeVariance :: VarAcc -> Double
computeVariance (VarAcc !Int
n Double
_ !Double
m2)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Double
0
| Bool
otherwise = Double
m2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
{-# INLINE computeVariance #-}
variance' :: VU.Vector Double -> Double
variance' :: Vector Double -> Double
variance' = VarAcc -> Double
computeVariance (VarAcc -> Double)
-> (Vector Double -> VarAcc) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VarAcc -> Double -> VarAcc) -> VarAcc -> Vector Double -> VarAcc
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
VU.foldl' VarAcc -> Double -> VarAcc
step (Int -> Double -> Double -> VarAcc
VarAcc Int
0 Double
0 Double
0)
{-# INLINE variance' #-}
correlation' :: VU.Vector Double -> VU.Vector Double -> Maybe Double
correlation' :: Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
xs Vector Double
ys
| Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
xs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
ys = Maybe Double
forall a. Maybe a
Nothing
| Int
nI Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Maybe Double
forall a. Maybe a
Nothing
| Bool
otherwise =
let !nf :: Double
nf = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nI
(!Double
sumX, !Double
sumY, !Double
sumSquaredX, !Double
sumSquaredY, !Double
sumXY) = Int
-> Double
-> Double
-> Double
-> Double
-> Double
-> (Double, Double, Double, Double, Double)
go Int
0 Double
0 Double
0 Double
0 Double
0 Double
0
!num :: Double
num = Double
nf Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumXY Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
sumX Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumY
!den :: Double
den = Double -> Double
forall a. Floating a => a -> a
sqrt ((Double
nf Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumSquaredX Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
sumX Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumX) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
nf Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumSquaredY Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
sumY Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sumY))
in Double -> Maybe Double
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Double
num Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
den)
where
!nI :: Int
nI = Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
xs
go :: Int
-> Double
-> Double
-> Double
-> Double
-> Double
-> (Double, Double, Double, Double, Double)
go !Int
i !Double
sumX !Double
sumY !Double
sumSquaredX !Double
sumSquaredY !Double
sumXY
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
nI =
let !x :: Double
x = Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Double
xs Int
i
!y :: Double
y = Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
VU.unsafeIndex Vector Double
ys Int
i
!sumX' :: Double
sumX' = Double
sumX Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
x
!sumY' :: Double
sumY' = Double
sumY Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
y
!sumSquaredX' :: Double
sumSquaredX' = Double
sumSquaredX Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x
!sumSquaredY' :: Double
sumSquaredY' = Double
sumSquaredY Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
y Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
y
!sumXY' :: Double
sumXY' = Double
sumXY Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
y
in Int
-> Double
-> Double
-> Double
-> Double
-> Double
-> (Double, Double, Double, Double, Double)
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Double
sumX' Double
sumY' Double
sumSquaredX' Double
sumSquaredY' Double
sumXY'
| Bool
otherwise = (Double
sumX, Double
sumY, Double
sumSquaredX, Double
sumSquaredY, Double
sumXY)
{-# INLINE correlation' #-}