{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Statistics where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU

import Prelude as P

import Control.Exception (throw)
import Data.Function ((&))
import Data.Maybe (fromMaybe, isJust)
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..), empty, getColumn)
import DataFrame.Internal.Row (showValue, toAny)
import DataFrame.Internal.Statistics
import DataFrame.Internal.Types
import DataFrame.Operations.Core
import DataFrame.Operations.Subset (filterJust)
import Text.Printf (printf)
import Type.Reflection (typeRep)

{- | Show a frequency table for a categorical feaure.

__Examples:__

@
ghci> df <- D.readCsv ".\/data\/housing.csv"

ghci> D.frequencies "ocean_proximity" df

----------------------------------------------------------------------------
index |   Statistic    | <1H OCEAN | INLAND | ISLAND | NEAR BAY | NEAR OCEAN
------|----------------|-----------|--------|--------|----------|-----------
 Int  |      Text      |    Any    |  Any   |  Any   |   Any    |    Any
------|----------------|-----------|--------|--------|----------|-----------
0     | Count          | 9136      | 6551   | 5      | 2290     | 2658
1     | Percentage (%) | 44.26%    | 31.74% | 0.02%  | 11.09%   | 12.88%
@
-}
frequencies :: T.Text -> DataFrame -> DataFrame
frequencies :: Text -> DataFrame -> DataFrame
frequencies Text
name DataFrame
df =
    let
        counts :: forall a. (Columnable a) => [(a, Int)]
        counts :: forall a. Columnable a => [(a, Int)]
counts = Text -> DataFrame -> [(a, Int)]
forall a. Columnable a => Text -> DataFrame -> [(a, Int)]
valueCounts Text
name DataFrame
df
        calculatePercentage :: [(a, a)] -> a -> Any
calculatePercentage [(a, a)]
cs a
k = String -> Any
forall a. Columnable a => a -> Any
toAny (String -> Any) -> String -> Any
forall a b. (a -> b) -> a -> b
$ Double -> String
toPct2dp (a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
k Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ ((a, a) -> a) -> [(a, a)] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (a, a) -> a
forall a b. (a, b) -> b
snd [(a, a)]
cs))
        initDf :: DataFrame
initDf =
            DataFrame
empty
                DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& Text -> Vector Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector Text
"Statistic" ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
"Count" :: T.Text, Text
"Percentage (%)"])
        freqs :: forall v a. (VG.Vector v a, Columnable a) => v a -> DataFrame
        freqs :: forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs v a
col =
            (DataFrame -> (a, Int) -> DataFrame)
-> DataFrame -> [(a, Int)] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl'
                ( \DataFrame
d (a
col, Int
k) ->
                    Text -> Vector Any -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
insertVector
                        (forall a. Columnable a => a -> Text
showValue @a a
col)
                        ([Any] -> Vector Any
forall a. [a] -> Vector a
V.fromList [Int -> Any
forall a. Columnable a => a -> Any
toAny Int
k, [(a, Int)] -> Int -> Any
forall {a} {a} {a}.
(Integral a, Integral a) =>
[(a, a)] -> a -> Any
calculatePercentage (forall a. Columnable a => [(a, Int)]
counts @a) Int
k])
                        DataFrame
d
                )
                DataFrame
initDf
                [(a, Int)]
forall a. Columnable a => [(a, Int)]
counts
     in
        case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
            Maybe Column
Nothing ->
                DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"frequencies" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
            Just ((BoxedColumn (Vector a
column :: V.Vector a))) -> Vector a -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector a
column
            Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector a))) -> Vector (Maybe a) -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector (Maybe a)
column
            Just ((UnboxedColumn (Vector a
column :: VU.Vector a))) -> Vector a -> DataFrame
forall (v :: * -> *) a.
(Vector v a, Columnable a) =>
v a -> DataFrame
freqs Vector a
column

-- | Calculates the mean of a given column as a standalone value.
mean :: T.Text -> DataFrame -> Maybe Double
mean :: Text -> DataFrame -> Maybe Double
mean = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean'

-- | Calculates the median of a given column as a standalone value.
median :: T.Text -> DataFrame -> Maybe Double
median :: Text -> DataFrame -> Maybe Double
median = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median'

-- | Calculates the standard deviation of a given column as a standalone value.
standardDeviation :: T.Text -> DataFrame -> Maybe Double
standardDeviation :: Text -> DataFrame -> Maybe Double
standardDeviation = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance')

-- | Calculates the skewness of a given column as a standalone value.
skewness :: T.Text -> DataFrame -> Maybe Double
skewness :: Text -> DataFrame -> Maybe Double
skewness = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
skewness'

-- | Calculates the variance of a given column as a standalone value.
variance :: T.Text -> DataFrame -> Maybe Double
variance :: Text -> DataFrame -> Maybe Double
variance = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance'

-- | Calculates the inter-quartile range of a given column as a standalone value.
interQuartileRange :: T.Text -> DataFrame -> Maybe Double
interQuartileRange :: Text -> DataFrame -> Maybe Double
interQuartileRange = (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
interQuartileRange'

-- | Calculates the Pearson's correlation coefficient between two given columns as a standalone value.
correlation :: T.Text -> T.Text -> DataFrame -> Maybe Double
correlation :: Text -> Text -> DataFrame -> Maybe Double
correlation Text
first Text
second DataFrame
df = do
    Vector Double
f <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
first DataFrame
df
    Vector Double
s <- Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
second DataFrame
df
    Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
f Vector Double
s

_getColumnAsDouble :: T.Text -> DataFrame -> Maybe (VU.Vector Double)
_getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Just (UnboxedColumn (Vector a
f :: VU.Vector a)) -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
        Just a :~: Double
Refl -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just Vector a
Vector Double
f
        Maybe (a :~: Double)
Nothing -> case forall a. SBoolI (IntegralTypes a) => SBool (IntegralTypes a)
sIntegral @a of
            SBool (IntegralTypes a)
STrue -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
f)
            SBool (IntegralTypes a)
SFalse -> case forall a. SBoolI (FloatingTypes a) => SBool (FloatingTypes a)
sFloating @a of
                SBool (FloatingTypes a)
STrue -> Vector Double -> Maybe (Vector Double)
forall a. a -> Maybe a
Just ((a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
f)
                SBool (FloatingTypes a)
SFalse -> Maybe (Vector Double)
forall a. Maybe a
Nothing
    Maybe Column
Nothing ->
        DataFrameException -> Maybe (Vector Double)
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe (Vector Double))
-> DataFrameException -> Maybe (Vector Double)
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"applyStatistic" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Maybe Column
_ -> Maybe (Vector Double)
forall a. Maybe a
Nothing
{-# INLINE _getColumnAsDouble #-}

-- | Calculates the sum of a given column as a standalone value.
sum ::
    forall a. (Columnable a, Num a, VU.Unbox a) => T.Text -> DataFrame -> Maybe a
sum :: forall a.
(Columnable a, Num a, Unbox a) =>
Text -> DataFrame -> Maybe a
sum Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
    Maybe Column
Nothing -> DataFrameException -> Maybe a
forall a e. Exception e => e -> a
throw (DataFrameException -> Maybe a) -> DataFrameException -> Maybe a
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"sum" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just ((UnboxedColumn (Vector a
column :: VU.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
        Just a :~: a
Refl -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
        Maybe (a :~: a)
Nothing -> Maybe a
forall a. Maybe a
Nothing
    Just ((BoxedColumn (Vector a
column :: V.Vector a'))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
        Just a :~: a
Refl -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum Vector a
Vector a
column
        Maybe (a :~: a)
Nothing -> Maybe a
forall a. Maybe a
Nothing
    Just ((OptionalColumn (Vector (Maybe a)
column :: V.Vector (Maybe a')))) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a') (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
        Just a :~: a
Refl -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum ((Maybe a -> a) -> Vector (Maybe a) -> Vector a
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (a -> Maybe a -> a
forall a. a -> Maybe a -> a
fromMaybe a
0) Vector (Maybe a)
Vector (Maybe a)
column)
        Maybe (a :~: a)
Nothing -> Maybe a
forall a. Maybe a
Nothing

applyStatistic ::
    (VU.Vector Double -> Double) -> T.Text -> DataFrame -> Maybe Double
applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
applyStatistic Vector Double -> Double
f Text
name DataFrame
df = Vector Double -> Maybe Double
apply (Vector Double -> Maybe Double)
-> Maybe (Vector Double) -> Maybe Double
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df)
  where
    apply :: Vector Double -> Maybe Double
apply Vector Double
col =
        let
            res :: Double
res = Vector Double -> Double
f Vector Double
col
         in
            if Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Double
res then Maybe Double
forall a. Maybe a
Nothing else Double -> Maybe Double
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Double
res
{-# INLINE applyStatistic #-}

applyStatistics ::
    (VU.Vector Double -> VU.Vector Double) ->
    T.Text ->
    DataFrame ->
    Maybe (VU.Vector Double)
applyStatistics :: (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics Vector Double -> Vector Double
f Text
name DataFrame
df = (Vector Double -> Vector Double)
-> Maybe (Vector Double) -> Maybe (Vector Double)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> Vector Double
f (Text -> DataFrame -> Maybe (Vector Double)
_getColumnAsDouble Text
name (Text -> DataFrame -> DataFrame
filterJust Text
name DataFrame
df))

-- | Descriptive statistics of the numeric columns.
summarize :: DataFrame -> DataFrame
summarize :: DataFrame -> DataFrame
summarize DataFrame
df =
    (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
fold
        Text -> DataFrame -> DataFrame
columnStats
        (DataFrame -> [Text]
columnNames DataFrame
df)
        ( [(Text, Column)] -> DataFrame
fromNamedColumns
            [
                ( Text
"Statistic"
                , [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList
                    [ Text
"Count" :: T.Text
                    , Text
"Mean"
                    , Text
"Minimum"
                    , Text
"25%"
                    , Text
"Median"
                    , Text
"75%"
                    , Text
"Max"
                    , Text
"StdDev"
                    , Text
"IQR"
                    , Text
"Skewness"
                    ]
                )
            ]
        )
  where
    columnStats :: Text -> DataFrame -> DataFrame
columnStats Text
name DataFrame
d =
        if (Maybe Double -> Bool) -> [Maybe Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe Double -> Bool
forall a. Maybe a -> Bool
isJust (Text -> [Maybe Double]
stats Text
name)
            then
                Text -> Vector Double -> DataFrame -> DataFrame
forall a.
(Columnable a, Unbox a) =>
Text -> Vector a -> DataFrame -> DataFrame
insertUnboxedVector
                    Text
name
                    ([Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList ((Maybe Double -> Double) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double -> Double
roundTo Int
2 (Double -> Double)
-> (Maybe Double -> Double) -> Maybe Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0) ([Maybe Double] -> [Double]) -> [Maybe Double] -> [Double]
forall a b. (a -> b) -> a -> b
$ Text -> [Maybe Double]
stats Text
name))
                    DataFrame
d
            else DataFrame
d
    stats :: Text -> [Maybe Double]
stats Text
name =
        let
            count :: Maybe Double
count = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (Column -> Int) -> Column -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Column -> Int
numElements (Column -> Double) -> Maybe Column -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df
            quantiles :: Maybe (Vector Double)
quantiles = (Vector Double -> Vector Double)
-> Text -> DataFrame -> Maybe (Vector Double)
applyStatistics (Vector Int -> Int -> Vector Double -> Vector Double
quantiles' ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
0, Int
1, Int
2, Int
3, Int
4]) Int
4) Text
name DataFrame
df
            min' :: Maybe Double
min' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
0 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            quartile1 :: Maybe Double
quartile1 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
1 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            median' :: Maybe Double
median' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
2 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            quartile3 :: Maybe Double
quartile3 = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
3 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            max' :: Maybe Double
max' = (Vector Double -> Int -> Double) -> Int -> Vector Double -> Double
forall a b c. (a -> b -> c) -> b -> a -> c
flip Vector Double -> Int -> Double
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
(VG.!) Int
4 (Vector Double -> Double) -> Maybe (Vector Double) -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Vector Double)
quantiles
            iqr :: Maybe Double
iqr = (-) (Double -> Double -> Double)
-> Maybe Double -> Maybe (Double -> Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Double
quartile3 Maybe (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe Double
quartile1
         in
            [ Maybe Double
count
            , Text -> DataFrame -> Maybe Double
mean Text
name DataFrame
df
            , Maybe Double
min'
            , Maybe Double
quartile1
            , Maybe Double
median'
            , Maybe Double
quartile3
            , Maybe Double
max'
            , Text -> DataFrame -> Maybe Double
standardDeviation Text
name DataFrame
df
            , Maybe Double
iqr
            , Text -> DataFrame -> Maybe Double
skewness Text
name DataFrame
df
            ]

-- | Round a @Double@ to Specified Precision
roundTo :: Int -> Double -> Double
roundTo :: Int -> Double -> Double
roundTo Int
n Double
x = Integer -> Double
forall a. Num a => Integer -> a
fromInteger (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double -> Integer) -> Double -> Integer
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10 Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
n) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10.0 Double -> Int -> Double
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
n

toPct2dp :: Double -> String
toPct2dp :: Double -> String
toPct2dp Double
x
    | Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0.00005 = String
"<0.01%"
    | Bool
otherwise = String -> Double -> String
forall r. PrintfType r => String -> r
printf String
"%.2f%%" (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
100)