{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE BangPatterns #-}
module DataFrame.Operations.Statistics where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import qualified Statistics.Quantile as SS
import qualified Statistics.Sample as SS
import Prelude as P
import Control.Exception (throw)
import DataFrame.Errors (DataFrameException(..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame(..), getColumn, empty)
import DataFrame.Operations.Core
import Data.Foldable (asum)
import Data.Maybe (isJust, fromMaybe)
import Data.Function ((&))
import Data.Type.Equality (type (:~:)(Refl), TestEquality (testEquality))
import Type.Reflection (typeRep)
frequencies :: T.Text -> DataFrame -> DataFrame
frequencies :: Text -> DataFrame -> DataFrame
frequencies Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"frequencies" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just ((BoxedColumn (Vector a
column :: V.Vector a))) -> let
counts :: [(a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((a, Int) -> Int) -> [(a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (a, Int) -> Int
forall a b. (a, b) -> b
snd [(a, Int)]
counts
vText :: forall a . (Columnable a) => a -> T.Text
vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
c'
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
c'
Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
c'
initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
in (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector (a -> Text
forall a. Columnable a => a -> Text
vText a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(a, Int)]
counts
Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) -> let
counts :: [(Maybe a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((Maybe a, Int) -> Int) -> [(Maybe a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (Maybe a, Int) -> Int
forall a b. (a, b) -> b
snd [(Maybe a, Int)]
counts
vText :: forall a . (Columnable a) => a -> T.Text
vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
c'
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
c'
Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
c'
initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
in (DataFrame -> (Maybe a, Int) -> DataFrame)
-> DataFrame -> [(Maybe a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (Maybe a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector (Maybe a -> Text
forall a. Columnable a => a -> Text
vText Maybe a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(Maybe a, Int)]
counts
Just ((UnboxedColumn (Vector a
column :: VU.Vector a))) -> let
counts :: [(a, Int)]
counts = forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts @a Text
name DataFrame
df
total :: Int
total = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ ((a, Int) -> Int) -> [(a, Int)] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (a, Int) -> Int
forall a b. (a, b) -> b
snd [(a, Int)]
counts
vText :: forall a . (Columnable a) => a -> T.Text
vText :: forall a. Columnable a => a -> Text
vText a
c' = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
Just a :~: Text
Refl -> a
Text
c'
Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
c'
Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
c'
initDf :: DataFrame
initDf = DataFrame
empty DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
in (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
df (a
col, Int
k) -> Text -> Vector Int -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector (a -> Text
forall a. Columnable a => a -> Text
vText a
col) ([Int] -> Vector Int
forall a. [a] -> Vector a
V.fromList [Int
k, Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
100 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
total]) DataFrame
df) DataFrame
initDf [(a, Int)]
counts
Maybe Column
_ -> [Char] -> DataFrame
forall a. HasCallStack => [Char] -> a
error ([Char] -> DataFrame) -> [Char] -> DataFrame
forall a b. (a -> b) -> a -> b
$ [Char]
"There are ungrouped columns"
mean :: T.Text -> DataFrame -> Maybe Double
mean :: Text -> DataFrame -> Maybe Double
mean = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
mean'
median :: T.Text -> DataFrame -> Maybe Double
median :: Text -> DataFrame -> Maybe Double
median = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (ContParam -> Vector Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
SS.median ContParam
SS.medianUnbiased)
standardDeviation :: T.Text -> DataFrame -> Maybe Double
standardDeviation :: Text -> DataFrame -> Maybe Double
standardDeviation = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.fastStdDev
skewness :: T.Text -> DataFrame -> Maybe Double
skewness :: Text -> DataFrame -> Maybe Double
skewness = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
SS.skewness
variance :: T.Text -> DataFrame -> Maybe Double
variance :: Text -> DataFrame -> Maybe Double
variance = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
variance'
interQuartileRange :: T.Text -> DataFrame -> Maybe Double
interQuartileRange :: Text -> DataFrame -> Maybe Double
interQuartileRange = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (ContParam -> Int -> Vector Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> Int -> v Double -> Double
SS.midspread ContParam
SS.medianUnbiased Int
4)
correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
correlation :: Text -> Text -> DataFrame -> Maybe Double
correlation Text
first Text
second DataFrame
df = do
Vector Double
f <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
first DataFrame
df
Vector Double
s <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
second DataFrame
df
Double -> Maybe Double
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> Maybe Double) -> Double -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Vector (Double, Double) -> Double
forall (v :: * -> *).
(Vector v (Double, Double), Vector v Double) =>
v (Double, Double) -> Double
SS.correlation (Vector Double -> Vector Double -> Vector (Double, Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
VG.zip Vector Double
f Vector Double
s)
_getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double)
_getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just (UnboxedColumn (Vector a
f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just Vector a
Vector Double
f
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
f
Maybe (a :~: Int)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
sum :: forall a. (Columnable a, Num a, VU.Unbox a) => T.Text -> DataFrame -> Maybe a
sum :: forall a.
(Columnable a, Num a, Unbox a) =>
Text -> DataFrame -> Maybe a
sum Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> Maybe a
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe a) -> DataFrameException -> Maybe a
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"sum" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just a :~: a
Refl -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
Maybe (a :~: a)
Nothing -> Maybe a
forall a. Maybe a
Nothing
applyStatistic :: (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> Maybe Double
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe Double)
-> DataFrameException -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"applyStatistic" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just column :: Column
column@(UnboxedColumn (Vector a
col :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> (Vector Double -> Double) -> Column -> Maybe Double
forall a b. Columnable a => (a -> b) -> Column -> Maybe b
reduceColumn Vector Double -> Double
f Column
column
Maybe (a :~: Double)
Nothing -> do
Column
matching <- [Maybe Column] -> Maybe Column
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum [(Int -> Double) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Maybe Column
mapColumn (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Int -> Double) Column
column,
(Integer -> Double) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Maybe Column
mapColumn (Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Integer -> Double) Column
column,
(Float -> Double) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Maybe Column
mapColumn (Float -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac :: Float -> Double) Column
column,
Column -> Maybe Column
forall a. a -> Maybe a
Just Column
column ]
(Vector Double -> Double) -> Column -> Maybe Double
forall a b. Columnable a => (a -> b) -> Column -> Maybe b
reduceColumn Vector Double -> Double
f Column
matching
Maybe Column
_ -> Maybe Double
forall a. Maybe a
Nothing
applyStatistics :: (VU.Vector Double -> VU.Vector Double) -> T.Text -> DataFrame -> Maybe (VU.Vector Double)
applyStatistics :: (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics Vector Double -> Vector Double
f Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
column)
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f Vector a
Vector Double
column
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just (Vector Double -> Maybe (Vector Double))
-> Vector Double -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$! Vector Double -> Vector Double
f ((a -> Double) -> Vector a -> Vector Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
column)
Maybe (a :~: Float)
Nothing -> Maybe (Vector Double)
forall a. Maybe a
Nothing
Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
summarize :: DataFrame -> DataFrame
summarize :: DataFrame -> DataFrame
summarize DataFrame
df = (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold Text -> DataFrame -> DataFrame
columnStats (DataFrame -> [Text]
columnNames DataFrame
df) ([(Text, Column)] -> DataFrame
fromNamedColumns [(Text
"Statistic", [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList [Text
"Mean" :: T.Text, Text
"Minimum", Text
"25%" ,Text
"Median", Text
"75%", Text
"Max", Text
"StdDev", Text
"IQR", Text
"Skewness"])])
where columnStats :: Text -> DataFrame -> DataFrame
columnStats Text
name DataFrame
d = if (Maybe Double -> Bool) -> [Maybe Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (Text -> [Maybe Double]
stats Text
name) then Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector Text
name ([Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double -> Double
roundTo Int
2 (Double -> Double)
-> (Maybe Double -> Double) -> Maybe Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Text -> [Maybe Double]
stats Text
name)) DataFrame
d else DataFrame
d
stats :: Text -> [Maybe Double]
stats Text
name = let
quantiles :: Maybe (Vector Double)
quantiles = (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics (ContParam -> Vector Int -> Int -> Vector Double -> Vector Double
forall (v :: * -> *).
(Vector v Double, Vector v Int) =>
ContParam -> v Int -> Int -> v Double -> v Double
SS.quantilesVec ContParam
SS.medianUnbiased ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0,Int
1,Int
2,Int
3,Int
4]) Int
4) Text
name DataFrame
df
min' :: Maybe Double
min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile1 :: Maybe Double
quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
median' :: Maybe Double
median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
quartile3 :: Maybe Double
quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
max' :: Maybe Double
max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
iqr :: Maybe Double
iqr = (-) (Double -> Double -> Double)
-> Maybe Double -> Maybe (Double -> Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe Double
quartile1
in [Text -> DataFrame -> Maybe Double
mean Text
name DataFrame
df,
Maybe Double
min',
Maybe Double
quartile1,
Maybe Double
median',
Maybe Double
quartile3,
Maybe Double
max',
Text -> DataFrame -> Maybe Double
standardDeviation Text
name DataFrame
df,
Maybe Double
iqr,
Text -> DataFrame -> Maybe Double
skewness Text
name DataFrame
df]
roundTo :: Int -> Double -> Double
roundTo :: Int -> Double -> Double
roundTo Int
n Double
x = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Integer) -> Double -> Integer
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
10Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^Int
n)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
10.0Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^Int
n)
mean' :: VU.Vector Double -> Double
mean' :: Vector Double -> Double
mean' Vector Double
samp = let
(!Double
total, !Int
n) = ((Double, Int) -> Double -> (Double, Int))
-> (Double, Int) -> Vector Double -> (Double, Int)
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl' (\(!Double
total, !Int
n) Double
v -> (Double
total Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) (Double
0 :: Double, Int
0 :: Int) Vector Double
samp
in Double
total Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
data VarAcc = VarAcc !Int !Double !Double deriving Int -> VarAcc -> ShowS
[VarAcc] -> ShowS
VarAcc -> [Char]
(Int -> VarAcc -> ShowS)
-> (VarAcc -> [Char]) -> ([VarAcc] -> ShowS) -> Show VarAcc
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> VarAcc -> ShowS
showsPrec :: Int -> VarAcc -> ShowS
$cshow :: VarAcc -> [Char]
show :: VarAcc -> [Char]
$cshowList :: [VarAcc] -> ShowS
showList :: [VarAcc] -> ShowS
Show
step :: VarAcc -> Double -> VarAcc
step :: VarAcc -> Double -> VarAcc
step (VarAcc !Int
n !Double
mean !Double
m2) !Double
x =
let !n' :: Int
n' = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
!delta :: Double
delta = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
mean
!mean' :: Double
mean' = Double
mean Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
delta Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n'
!m2' :: Double
m2' = Double
m2 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
mean')
in Int -> Double -> Double -> VarAcc
VarAcc Int
n' Double
mean' Double
m2'
computeVariance :: VarAcc -> Double
computeVariance :: VarAcc -> Double
computeVariance (VarAcc Int
n Double
_ Double
m2)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = Double
0
| Bool
otherwise = Double
m2 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
variance' :: VU.Vector Double -> Double
variance' :: Vector Double -> Double
variance' = VarAcc -> Double
computeVariance (VarAcc -> Double)
-> (Vector Double -> VarAcc) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (VarAcc -> Double -> VarAcc) -> VarAcc -> Vector Double -> VarAcc
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl' VarAcc -> Double -> VarAcc
step (Int -> Double -> Double -> VarAcc
VarAcc Int
0 Double
0 Double
0)