| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
DataFrame.Operations.Statistics
Contents
Synopsis
- frequencies :: Columnable a => Expr a -> DataFrame -> DataFrame
- mean :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- meanMaybe :: (Columnable a, Real a) => Expr (Maybe a) -> DataFrame -> Double
- median :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- medianMaybe :: (Columnable a, Real a) => Expr (Maybe a) -> DataFrame -> Double
- percentile :: (Columnable a, Real a, Unbox a) => Int -> Expr a -> DataFrame -> Double
- genericPercentile :: (Columnable a, Ord a) => Int -> Expr a -> DataFrame -> a
- standardDeviation :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- skewness :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- variance :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- interQuartileRange :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double
- correlation :: Text -> Text -> DataFrame -> Maybe Double
- _getColumnAsDouble :: Text -> DataFrame -> Maybe (Vector Double)
- optionalToDoubleVector :: Real a => Vector (Maybe a) -> Vector Double
- sum :: (Columnable a, Num a) => Expr a -> DataFrame -> a
- imputeWith :: (ImputeOp a, Columnable (BaseType a)) => (Expr (BaseType a) -> Expr (BaseType a)) -> Expr a -> DataFrame -> DataFrame
- applyStatistic :: (Vector Double -> Double) -> Text -> DataFrame -> Maybe Double
- applyStatistics :: (Vector Double -> Vector Double) -> Text -> DataFrame -> Maybe (Vector Double)
- summarize :: DataFrame -> DataFrame
- roundTo :: Int -> Double -> Double
- toPct2dp :: Double -> String
Documentation
frequencies :: Columnable a => Expr a -> DataFrame -> DataFrame Source #
Show a frequency table for a categorical feaure.
Examples:
ghci> df <- D.readCsv "./data/housing.csv"
ghci> D.frequencies "ocean_proximity" df
---------------------------------------------------------------------
Statistic | <1H OCEAN | INLAND | ISLAND | NEAR BAY | NEAR OCEAN
----------------|-----------|--------|--------|----------|-----------
Text | Any | Any | Any | Any | Any
----------------|-----------|--------|--------|----------|-----------
Count | 9136 | 6551 | 5 | 2290 | 2658
Percentage (%) | 44.26% | 31.74% | 0.02% | 11.09% | 12.88%
mean :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the mean of a given column as a standalone value.
median :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the median of a given column as a standalone value.
medianMaybe :: (Columnable a, Real a) => Expr (Maybe a) -> DataFrame -> Double Source #
Calculates the median of a given column (containing optional values) as a standalone value.
percentile :: (Columnable a, Real a, Unbox a) => Int -> Expr a -> DataFrame -> Double Source #
Calculates the nth percentile of a given column as a standalone value.
genericPercentile :: (Columnable a, Ord a) => Int -> Expr a -> DataFrame -> a Source #
Calculates the nth percentile of a given column as a standalone value.
standardDeviation :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the standard deviation of a given column as a standalone value.
skewness :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the skewness of a given column as a standalone value.
variance :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the variance of a given column as a standalone value.
interQuartileRange :: (Columnable a, Real a, Unbox a) => Expr a -> DataFrame -> Double Source #
Calculates the inter-quartile range of a given column as a standalone value.
correlation :: Text -> Text -> DataFrame -> Maybe Double Source #
Calculates the Pearson's correlation coefficient between two given columns as a standalone value.
sum :: (Columnable a, Num a) => Expr a -> DataFrame -> a Source #
Calculates the sum of a given column as a standalone value.
imputeWith :: (ImputeOp a, Columnable (BaseType a)) => (Expr (BaseType a) -> Expr (BaseType a)) -> Expr a -> DataFrame -> DataFrame Source #
applyStatistics :: (Vector Double -> Vector Double) -> Text -> DataFrame -> Maybe (Vector Double) Source #
Orphan instances
| Columnable b => ImputeOp (Maybe b) Source # | O(n) Impute missing values in a column using a derived scalar. Given
this function:
Throws
Example>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> let df =
... D.fromNamedColumns
... [ ("age", D.fromList [Just 10, Nothing, Just 20 :: Maybe Int]) ]
>>>
>>> -- Impute missing ages with the mean of the observed ages
>>> D.imputeWith F.mean "age" df
-- age
-- ----
-- 10
-- 15
-- 20
|