{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Display.Terminal.Plot where
import Control.Monad
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import Data.Typeable (Typeable)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import GHC.Stack (HasCallStack)
import Type.Reflection (typeRep)
import DataFrame.Internal.Column (Column (..), isNumeric)
import qualified DataFrame.Internal.Column as D
import DataFrame.Internal.DataFrame (DataFrame (..), getColumn)
import DataFrame.Operations.Core
import qualified DataFrame.Operations.Subset as D
import Granite
data PlotConfig = PlotConfig
{ PlotConfig -> PlotType
plotType :: PlotType
, PlotConfig -> Plot
plotSettings :: Plot
}
data PlotType
= Histogram
| Scatter
| Line
| Bar
| BoxPlot
| Pie
| StackedBar
| Heatmap
deriving (PlotType -> PlotType -> Bool
(PlotType -> PlotType -> Bool)
-> (PlotType -> PlotType -> Bool) -> Eq PlotType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PlotType -> PlotType -> Bool
== :: PlotType -> PlotType -> Bool
$c/= :: PlotType -> PlotType -> Bool
/= :: PlotType -> PlotType -> Bool
Eq, Int -> PlotType -> ShowS
[PlotType] -> ShowS
PlotType -> String
(Int -> PlotType -> ShowS)
-> (PlotType -> String) -> ([PlotType] -> ShowS) -> Show PlotType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PlotType -> ShowS
showsPrec :: Int -> PlotType -> ShowS
$cshow :: PlotType -> String
show :: PlotType -> String
$cshowList :: [PlotType] -> ShowS
showList :: [PlotType] -> ShowS
Show)
defaultPlotConfig :: PlotType -> PlotConfig
defaultPlotConfig :: PlotType -> PlotConfig
defaultPlotConfig PlotType
ptype =
PlotConfig
{ plotType :: PlotType
plotType = PlotType
ptype
, plotSettings :: Plot
plotSettings = Plot
defPlot
}
plotHistogram :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotHistogram :: HasCallStack => Text -> DataFrame -> IO ()
plotHistogram Text
colName = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Histogram)
plotHistogramWith ::
(HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith Text
colName PlotConfig
config DataFrame
df = do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
(Double
minVal, Double
maxVal) = if [Double] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Double]
values then (Double
0, Double
1) else ([Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Double]
values, [Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
values)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ Bins -> [Double] -> Plot -> Text
histogram (Int -> Double -> Double -> Bins
bins Int
30 Double
minVal Double
maxVal) [Double]
values (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotScatter :: (HasCallStack) => T.Text -> T.Text -> DataFrame -> IO ()
plotScatter :: HasCallStack => Text -> Text -> DataFrame -> IO ()
plotScatter Text
xCol Text
yCol = HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith Text
xCol Text
yCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Scatter)
plotScatterWith ::
(HasCallStack) => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith Text
xCol Text
yCol PlotConfig
config DataFrame
df = do
let xVals :: [Double]
xVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
xCol DataFrame
df
yVals :: [Double]
yVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
yCol DataFrame
df
points :: [(Double, Double)]
points = [Double] -> [Double] -> [(Double, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
xVals [Double]
yVals
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, [(Double, Double)])] -> Plot -> Text
scatter [(Text
xCol Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" vs " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
yCol, [(Double, Double)]
points)] (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotScatterBy ::
(HasCallStack) => T.Text -> T.Text -> T.Text -> DataFrame -> IO ()
plotScatterBy :: HasCallStack => Text -> Text -> Text -> DataFrame -> IO ()
plotScatterBy Text
xCol Text
yCol Text
grouping = HasCallStack =>
Text -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterByWith Text
xCol Text
yCol Text
grouping (PlotType -> PlotConfig
defaultPlotConfig PlotType
Scatter)
plotScatterByWith ::
(HasCallStack) => T.Text -> T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotScatterByWith :: HasCallStack =>
Text -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterByWith Text
xCol Text
yCol Text
grouping PlotConfig
config DataFrame
df = do
let vals :: [Text]
vals = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
grouping DataFrame
df
let df' :: DataFrame
df' = Text -> Column -> DataFrame -> DataFrame
insertColumn Text
grouping ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
D.fromList [Text]
vals) DataFrame
df
[(Text, [(Double, Double)])]
xs <- [Text]
-> (Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM ([Text] -> [Text]
forall a. Eq a => [a] -> [a]
L.nub [Text]
vals) ((Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])])
-> (Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let filtered :: DataFrame
filtered = Text -> (Text -> Bool) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> (a -> Bool) -> DataFrame -> DataFrame
D.filter Text
grouping (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
col) DataFrame
df'
xVals :: [Double]
xVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
xCol DataFrame
filtered
yVals :: [Double]
yVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
yCol DataFrame
filtered
points :: [(Double, Double)]
points = [Double] -> [Double] -> [(Double, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
xVals [Double]
yVals
(Text, [(Double, Double)]) -> IO (Text, [(Double, Double)])
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
col, [(Double, Double)]
points)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, [(Double, Double)])] -> Plot -> Text
scatter [(Text, [(Double, Double)])]
xs (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotLines :: (HasCallStack) => T.Text -> [T.Text] -> DataFrame -> IO ()
plotLines :: HasCallStack => Text -> [Text] -> DataFrame -> IO ()
plotLines Text
xAxis [Text]
colNames = HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith Text
xAxis [Text]
colNames (PlotType -> PlotConfig
defaultPlotConfig PlotType
Line)
plotLinesWith ::
(HasCallStack) => T.Text -> [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith :: HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith Text
xAxis [Text]
colNames PlotConfig
config DataFrame
df = do
[(Text, [(Double, Double)])]
seriesData <- [Text]
-> (Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
colNames ((Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])])
-> (Text -> IO (Text, [(Double, Double)]))
-> IO [(Text, [(Double, Double)])]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
indices :: [Double]
indices = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
xAxis DataFrame
df
(Text, [(Double, Double)]) -> IO (Text, [(Double, Double)])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
col, [Double] -> [Double] -> [(Double, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
indices [Double]
values)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, [(Double, Double)])] -> Plot -> Text
lineGraph [(Text, [(Double, Double)])]
seriesData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotBoxPlots :: (HasCallStack) => [T.Text] -> DataFrame -> IO ()
plotBoxPlots :: HasCallStack => [Text] -> DataFrame -> IO ()
plotBoxPlots [Text]
colNames = HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
[Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith [Text]
colNames (PlotType -> PlotConfig
defaultPlotConfig PlotType
BoxPlot)
plotBoxPlotsWith ::
(HasCallStack) => [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith :: HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith [Text]
colNames PlotConfig
config DataFrame
df = do
[(Text, [Double])]
boxData <- [Text] -> (Text -> IO (Text, [Double])) -> IO [(Text, [Double])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
colNames ((Text -> IO (Text, [Double])) -> IO [(Text, [Double])])
-> (Text -> IO (Text, [Double])) -> IO [(Text, [Double])]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
(Text, [Double]) -> IO (Text, [Double])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
col, [Double]
values)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, [Double])] -> Plot -> Text
boxPlot [(Text, [Double])]
boxData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotStackedBars :: (HasCallStack) => T.Text -> [T.Text] -> DataFrame -> IO ()
plotStackedBars :: HasCallStack => Text -> [Text] -> DataFrame -> IO ()
plotStackedBars Text
categoryCol [Text]
valueColumns = HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith Text
categoryCol [Text]
valueColumns (PlotType -> PlotConfig
defaultPlotConfig PlotType
StackedBar)
plotStackedBarsWith ::
(HasCallStack) => T.Text -> [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith :: HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith Text
categoryCol [Text]
valueColumns PlotConfig
config DataFrame
df = do
let categories :: [Text]
categories = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
categoryCol DataFrame
df
uniqueCategories :: [Text]
uniqueCategories = [Text] -> [Text]
forall a. Eq a => [a] -> [a]
L.nub [Text]
categories
[(Text, [(Text, Double)])]
stackData <- [Text]
-> (Text -> IO (Text, [(Text, Double)]))
-> IO [(Text, [(Text, Double)])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
uniqueCategories ((Text -> IO (Text, [(Text, Double)]))
-> IO [(Text, [(Text, Double)])])
-> (Text -> IO (Text, [(Text, Double)]))
-> IO [(Text, [(Text, Double)])]
forall a b. (a -> b) -> a -> b
$ \Text
cat -> do
let indices :: [Int]
indices = [Int
i | (Int
i, Text
c) <- [Int] -> [Text] -> [(Int, Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 ..] [Text]
categories, Text
c Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
cat]
[(Text, Double)]
seriesData <- [Text] -> (Text -> IO (Text, Double)) -> IO [(Text, Double)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
valueColumns ((Text -> IO (Text, Double)) -> IO [(Text, Double)])
-> (Text -> IO (Text, Double)) -> IO [(Text, Double)]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let allValues :: [Double]
allValues = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
values :: [Double]
values = [[Double]
allValues [Double] -> Int -> Double
forall a. HasCallStack => [a] -> Int -> a
!! Int
i | Int
i <- [Int]
indices, Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
allValues]
(Text, Double) -> IO (Text, Double)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
col, [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
values)
(Text, [(Text, Double)]) -> IO (Text, [(Text, Double)])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
cat, [(Text, Double)]
seriesData)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, [(Text, Double)])] -> Plot -> Text
stackedBars [(Text, [(Text, Double)])]
stackData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotHeatmap :: (HasCallStack) => DataFrame -> IO ()
plotHeatmap :: HasCallStack => DataFrame -> IO ()
plotHeatmap = HasCallStack => PlotConfig -> DataFrame -> IO ()
PlotConfig -> DataFrame -> IO ()
plotHeatmapWith (PlotType -> PlotConfig
defaultPlotConfig PlotType
Heatmap)
plotHeatmapWith :: (HasCallStack) => PlotConfig -> DataFrame -> IO ()
plotHeatmapWith :: HasCallStack => PlotConfig -> DataFrame -> IO ()
plotHeatmapWith PlotConfig
config DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
matrix :: [[Double]]
matrix = (Text -> [Double]) -> [Text] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map (HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
`extractNumericColumn` DataFrame
df) [Text]
numericCols
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [[Double]] -> Plot -> Text
heatmap [[Double]]
matrix (PlotConfig -> Plot
plotSettings PlotConfig
config)
isNumericColumn :: DataFrame -> T.Text -> Bool
isNumericColumn :: DataFrame -> Text -> Bool
isNumericColumn DataFrame
df Text
colName = Bool -> (Column -> Bool) -> Maybe Column -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Column -> Bool
isNumeric (Text -> DataFrame -> Maybe Column
getColumn Text
colName DataFrame
df)
plotAllHistograms :: (HasCallStack) => DataFrame -> IO ()
plotAllHistograms :: HasCallStack => DataFrame -> IO ()
plotAllHistograms DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
[Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
numericCols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
Text -> IO ()
T.putStrLn Text
col
HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotHistogram Text
col DataFrame
df
plotCorrelationMatrix :: (HasCallStack) => DataFrame -> IO ()
plotCorrelationMatrix :: HasCallStack => DataFrame -> IO ()
plotCorrelationMatrix DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
let correlations :: [[Double]]
correlations =
(Text -> [Double]) -> [Text] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map
( \Text
col1 ->
(Text -> Double) -> [Text] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map
( \Text
col2 ->
let
vals1 :: [Double]
vals1 = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col1 DataFrame
df
vals2 :: [Double]
vals2 = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col2 DataFrame
df
in
[Double] -> [Double] -> Double
forall {a}. Floating a => [a] -> [a] -> a
correlation [Double]
vals1 [Double]
vals2
)
[Text]
numericCols
)
[Text]
numericCols
[(Integer, Text)] -> IO ()
forall a. Show a => a -> IO ()
print ([Integer] -> [Text] -> [(Integer, Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Text]
numericCols)
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [[Double]] -> Plot -> Text
heatmap [[Double]]
correlations (Plot
defPlot{plotTitle = "Correlation Matrix"})
where
correlation :: [a] -> [a] -> a
correlation [a]
xs [a]
ys =
let n :: a
n = Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
meanX :: a
meanX = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [a]
xs a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
meanY :: a
meanY = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [a]
ys a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
covXY :: a
covXY = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
meanX) a -> a -> a
forall a. Num a => a -> a -> a
* (a
y a -> a -> a
forall a. Num a => a -> a -> a
- a
meanY) | (a
x, a
y) <- [a] -> [a] -> [(a, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [a]
xs [a]
ys] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
stdX :: a
stdX = a -> a
forall a. Floating a => a -> a
sqrt (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
meanX) a -> Integer -> a
forall a b. (Num a, Integral b) => a -> b -> a
^ Integer
2 | a
x <- [a]
xs] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
stdY :: a
stdY = a -> a
forall a. Floating a => a -> a
sqrt (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
y a -> a -> a
forall a. Num a => a -> a -> a
- a
meanY) a -> Integer -> a
forall a b. (Num a, Integral b) => a -> b -> a
^ Integer
2 | a
y <- [a]
ys] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
in a
covXY a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
stdX a -> a -> a
forall a. Num a => a -> a -> a
* a
stdY)
plotBars :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotBars :: HasCallStack => Text -> DataFrame -> IO ()
plotBars Text
colName = HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith Text
colName Maybe Text
forall a. Maybe a
Nothing (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar)
plotBarsWith ::
(HasCallStack) => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith :: HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith Text
colName Maybe Text
groupByCol PlotConfig
config DataFrame
df =
case Maybe Text
groupByCol of
Maybe Text
Nothing -> HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars Text
colName PlotConfig
config DataFrame
df
Just Text
grpCol -> HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith Text
grpCol Text
colName PlotConfig
config DataFrame
df
plotSingleBars :: (HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars Text
colName PlotConfig
config DataFrame
df = do
let barData :: Maybe [(Text, Double)]
barData = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
barData of
Just [(Text, Double)]
counts -> do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
10 [(Text, Double)]
counts
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
if [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
20
then do
let labels :: [Text]
labels = (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(Text, Double)]
paired = [Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
10 [(Text, Double)]
paired
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let labels :: [Text]
labels = (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars ([Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values) (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotBarsTopN :: (HasCallStack) => Int -> T.Text -> DataFrame -> IO ()
plotBarsTopN :: HasCallStack => Int -> Text -> DataFrame -> IO ()
plotBarsTopN Int
n Text
colName = HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith Int
n Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar)
plotBarsTopNWith ::
(HasCallStack) => Int -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith :: HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith Int
n Text
colName PlotConfig
config DataFrame
df = do
let barData :: Maybe [(Text, Double)]
barData = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
barData of
Just [(Text, Double)]
counts -> do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
n [(Text, Double)]
counts
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
labels :: [Text]
labels = (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(Text, Double)]
paired = [Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
n [(Text, Double)]
paired
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotGroupedBarsWith ::
(HasCallStack) => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith = HasCallStack =>
Int -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
Int -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWithN Int
10
plotGroupedBarsWithN ::
(HasCallStack) => Int -> T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWithN :: HasCallStack =>
Int -> Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWithN Int
n Text
groupCol Text
valCol PlotConfig
config DataFrame
df = do
let colIsNumeric :: Bool
colIsNumeric = Text -> DataFrame -> Bool
isNumericColumnCheck Text
valCol DataFrame
df
if Bool
colIsNumeric
then do
let groups :: [Text]
groups = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
groupCol DataFrame
df
values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
m :: Map Text Double
m = (Double -> Double -> Double) -> [(Text, Double)] -> Map Text Double
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) ([Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
groups [Double]
values)
grouped :: [(Text, Double)]
grouped = (Text -> (Text, Double)) -> [Text] -> [(Text, Double)]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
v -> (Text
v, Map Text Double
m Map Text Double -> Text -> Double
forall k a. Ord k => Map k a -> k -> a
M.! Text
v)) [Text]
groups
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let groups :: [Text]
groups = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
groupCol DataFrame
df
vals :: [Text]
vals = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
valCol DataFrame
df
pairs :: [(Text, Text)]
pairs = [Text] -> [Text] -> [(Text, Text)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
groups [Text]
vals
counts :: [(Text, Integer)]
counts =
Map Text Integer -> [(Text, Integer)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Integer -> [(Text, Integer)])
-> Map Text Integer -> [(Text, Integer)]
forall a b. (a -> b) -> a -> b
$
(Integer -> Integer -> Integer)
-> [(Text, Integer)] -> Map Text Integer
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith
Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+)
[(Text
g Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" - " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
v, Integer
1) | (Text
g, Text
v) <- [(Text, Text)]
pairs]
finalCounts :: [(Text, Double)]
finalCounts = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
n [(Text
k, Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
v) | (Text
k, Integer
v) <- [(Text, Integer)]
counts]
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
finalCounts (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotValueCounts :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotValueCounts :: HasCallStack => Text -> DataFrame -> IO ()
plotValueCounts Text
colName = HasCallStack => Text -> Int -> PlotConfig -> DataFrame -> IO ()
Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith Text
colName Int
10 (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar)
plotValueCountsWith ::
(HasCallStack) => T.Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith :: HasCallStack => Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith Text
colName Int
maxBars PlotConfig
config DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
maxBars [(Text, Double)]
c
config' :: PlotConfig
config' =
PlotConfig
config
{ plotSettings =
(plotSettings config)
{ plotTitle =
if T.null (plotTitle (plotSettings config))
then "Value counts for " <> colName
else plotTitle (plotSettings config)
}
}
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config')
Maybe [(Text, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Could not get value counts for column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName
plotBarsWithPercentages :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotBarsWithPercentages :: HasCallStack => Text -> DataFrame -> IO ()
plotBarsWithPercentages Text
colName DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
c)
percentages :: [(Text, Double)]
percentages =
[ (Text
label Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)", Double
val)
| (Text
label, Double
val) <- [(Text, Double)]
c
]
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
10 [(Text, Double)]
percentages
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (Plot
defPlot{plotTitle = "Distribution of " <> colName})
Maybe [(Text, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Could not get value counts for column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName
smartPlotBars :: (HasCallStack) => T.Text -> DataFrame -> IO ()
smartPlotBars :: HasCallStack => Text -> DataFrame -> IO ()
smartPlotBars Text
colName DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let numUnique :: Int
numUnique = [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c
config :: PlotConfig
config =
(PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar)
{ plotSettings =
(plotSettings (defaultPlotConfig Bar))
{ plotTitle = colName <> " (" <> T.pack (show numUnique) <> " unique values)"
}
}
if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
12
then Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
c (PlotConfig -> Plot
plotSettings PlotConfig
config)
else
if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
20
then do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
12 [(Text, Double)]
c
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
10 [(Text, Double)]
c
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
bars [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotBars Text
colName DataFrame
df
plotCategoricalSummary :: (HasCallStack) => DataFrame -> IO ()
plotCategoricalSummary :: HasCallStack => DataFrame -> IO ()
plotCategoricalSummary DataFrame
df = do
let cols :: [Text]
cols = DataFrame -> [Text]
columnNames DataFrame
df
[Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
cols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
col DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
let numUnique :: Int
numUnique = [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
String
"\n=== " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
col String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
numUnique String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" unique values) ==="
if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
15 then HasCallStack => Int -> Text -> DataFrame -> IO ()
Int -> Text -> DataFrame -> IO ()
plotBarsTopN Int
10 Text
col DataFrame
df else HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotBars Text
col DataFrame
df
Maybe [(Text, Double)]
Nothing -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
getCategoricalCounts ::
(HasCallStack) => T.Text -> DataFrame -> Maybe [(T.Text, Double)]
getCategoricalCounts :: HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> Maybe [(Text, Double)]
forall a. HasCallStack => String -> a
error (String -> Maybe [(Text, Double)])
-> String -> Maybe [(Text, Double)]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec ->
let counts :: [(a, Int)]
counts = Vector a -> [(a, Int)]
forall a. (Ord a, Show a) => Vector a -> [(a, Int)]
countValues Vector a
vec
in [(Text, Double)] -> Maybe [(Text, Double)]
forall a. a -> Maybe a
Just [(String -> Text
T.pack (a -> String
forall a. Show a => a -> String
show a
k), Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) | (a
k, Int
v) <- [(a, Int)]
counts]
UnboxedColumn Vector a
vec ->
let counts :: [(a, Int)]
counts = Vector a -> [(a, Int)]
forall a. (Ord a, Show a, Unbox a) => Vector a -> [(a, Int)]
countValuesUnboxed Vector a
vec
in [(Text, Double)] -> Maybe [(Text, Double)]
forall a. a -> Maybe a
Just [(String -> Text
T.pack (a -> String
forall a. Show a => a -> String
show a
k), Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) | (a
k, Int
v) <- [(a, Int)]
counts]
OptionalColumn Vector (Maybe a)
vec ->
let counts :: [(Maybe a, Int)]
counts = Vector (Maybe a) -> [(Maybe a, Int)]
forall a. (Ord a, Show a) => Vector a -> [(a, Int)]
countValues Vector (Maybe a)
vec
in [(Text, Double)] -> Maybe [(Text, Double)]
forall a. a -> Maybe a
Just [(String -> Text
T.pack (Maybe a -> String
forall a. Show a => a -> String
show Maybe a
k), Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) | (Maybe a
k, Int
v) <- [(Maybe a, Int)]
counts]
where
countValues :: (Ord a, Show a) => V.Vector a -> [(a, Int)]
countValues :: forall a. (Ord a, Show a) => Vector a -> [(a, Int)]
countValues Vector a
vec = Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map a Int -> [(a, Int)]) -> Map a Int -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ (a -> Map a Int -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> b) -> b -> Vector a -> b
V.foldr' (\a
x Map a Int
acc -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
x Int
1 Map a Int
acc) Map a Int
forall k a. Map k a
M.empty Vector a
vec
countValuesUnboxed :: (Ord a, Show a, VU.Unbox a) => VU.Vector a -> [(a, Int)]
countValuesUnboxed :: forall a. (Ord a, Show a, Unbox a) => Vector a -> [(a, Int)]
countValuesUnboxed Vector a
vec = Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map a Int -> [(a, Int)]) -> Map a Int -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ (a -> Map a Int -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. Unbox a => (a -> b -> b) -> b -> Vector a -> b
VU.foldr' (\a
x Map a Int
acc -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
x Int
1 Map a Int
acc) Map a Int
forall k a. Map k a
M.empty Vector a
vec
isNumericColumnCheck :: T.Text -> DataFrame -> Bool
isNumericColumnCheck :: Text -> DataFrame -> Bool
isNumericColumnCheck Text
colName DataFrame
df = DataFrame -> Text -> Bool
isNumericColumn DataFrame
df Text
colName
extractStringColumn :: (HasCallStack) => T.Text -> DataFrame -> [T.Text]
extractStringColumn :: HasCallStack => Text -> DataFrame -> [Text]
extractStringColumn Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> [Text]
forall a. HasCallStack => String -> a
error (String -> [Text]) -> String -> [Text]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec -> Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (a -> Text) -> Vector a -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) Vector a
vec
UnboxedColumn Vector a
vec -> Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (a -> Text) -> Vector a -> Vector Text
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map (String -> Text
T.pack (String -> Text) -> (a -> String) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> String
forall a. Show a => a -> String
show) (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert Vector a
vec)
OptionalColumn Vector (Maybe a)
vec -> Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList (Vector Text -> [Text]) -> Vector Text -> [Text]
forall a b. (a -> b) -> a -> b
$ (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (String -> Text
T.pack (String -> Text) -> (Maybe a -> String) -> Maybe a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe a -> String
forall a. Show a => a -> String
show) Vector (Maybe a)
vec
extractNumericColumn :: (HasCallStack) => T.Text -> DataFrame -> [Double]
extractNumericColumn :: HasCallStack => Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec -> Vector a -> [Double]
forall a. (Typeable a, Show a) => Vector a -> [Double]
vectorToDoubles Vector a
vec
UnboxedColumn Vector a
vec -> Vector a -> [Double]
forall a. (Typeable a, Unbox a, Show a) => Vector a -> [Double]
unboxedVectorToDoubles Vector a
vec
Column
_ -> []
vectorToDoubles :: forall a. (Typeable a, Show a) => V.Vector a -> [Double]
vectorToDoubles :: forall a. (Typeable a, Show a) => Vector a -> [Double]
vectorToDoubles Vector a
vec =
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList Vector a
Vector Double
vec
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Integer -> Maybe (a :~: Integer)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Integer) of
Just a :~: Integer
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Integer)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
vec
Maybe (a :~: Float)
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column is not numeric (type: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
unboxedVectorToDoubles ::
forall a. (Typeable a, VU.Unbox a, Show a) => VU.Vector a -> [Double]
unboxedVectorToDoubles :: forall a. (Typeable a, Unbox a, Show a) => Vector a -> [Double]
unboxedVectorToDoubles Vector a
vec =
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector a
Vector Double
vec
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
vec
Maybe (a :~: Float)
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column is not numeric (type: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
groupWithOther :: Int -> [(T.Text, Double)] -> [(T.Text, Double)]
groupWithOther :: Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOther Int
n [(Text, Double)]
items =
let sorted :: [(Text, Double)]
sorted = ((Text, Double) -> Double) -> [(Text, Double)] -> [(Text, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((Text, Double) -> Double) -> (Text, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Double) -> Double
forall a b. (a, b) -> b
snd) [(Text, Double)]
items
([(Text, Double)]
topN, [(Text, Double)]
rest) = Int -> [(Text, Double)] -> ([(Text, Double)], [(Text, Double)])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [(Text, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
rest)
result :: [(Text, Double)]
result =
if [(Text, Double)] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(Text, Double)]
rest Bool -> Bool -> Bool
|| Double
otherSum Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
then [(Text, Double)]
topN
else [(Text, Double)]
topN [(Text, Double)] -> [(Text, Double)] -> [(Text, Double)]
forall a. [a] -> [a] -> [a]
++ [(Text
"Other (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show ([(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
rest)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" items)", Double
otherSum)]
in [(Text, Double)]
result
plotPie :: (HasCallStack) => T.Text -> Maybe T.Text -> DataFrame -> IO ()
plotPie :: HasCallStack => Text -> Maybe Text -> DataFrame -> IO ()
plotPie Text
valCol Maybe Text
labelCol = HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith Text
valCol Maybe Text
labelCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
plotPieWith ::
(HasCallStack) => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith :: HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith Text
valCol Maybe Text
labelCol PlotConfig
config DataFrame
df = do
let categoricalData :: Maybe [(Text, Double)]
categoricalData = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
valCol DataFrame
df
case Maybe [(Text, Double)]
categoricalData of
Just [(Text, Double)]
counts -> do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
counts
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
labels :: [Text]
labels = case Maybe Text
labelCol of
Maybe Text
Nothing -> (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
Just Text
lCol -> HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
lCol DataFrame
df
let pieData :: [(Text, Double)]
pieData = [Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values
grouped :: [(Text, Double)]
grouped =
if [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
pieData Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10
then Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
pieData
else [(Text, Double)]
pieData
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
groupWithOtherForPie :: Int -> [(T.Text, Double)] -> [(T.Text, Double)]
groupWithOtherForPie :: Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
n [(Text, Double)]
items =
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
items)
sorted :: [(Text, Double)]
sorted = ((Text, Double) -> Double) -> [(Text, Double)] -> [(Text, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((Text, Double) -> Double) -> (Text, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Double) -> Double
forall a b. (a, b) -> b
snd) [(Text, Double)]
items
([(Text, Double)]
topN, [(Text, Double)]
rest) = Int -> [(Text, Double)] -> ([(Text, Double)], [(Text, Double)])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [(Text, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
rest)
otherPct :: Int
otherPct = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
otherSum Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int
result :: [(Text, Double)]
result =
if [(Text, Double)] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(Text, Double)]
rest Bool -> Bool -> Bool
|| Double
otherSum Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
then [(Text, Double)]
topN
else
[(Text, Double)]
topN
[(Text, Double)] -> [(Text, Double)] -> [(Text, Double)]
forall a. [a] -> [a] -> [a]
++ [
( Text
"Other ("
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show ([(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
rest))
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" items, "
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
otherPct)
Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)"
, Double
otherSum
)
]
in [(Text, Double)]
result
plotPieWithPercentages :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotPieWithPercentages :: HasCallStack => Text -> DataFrame -> IO ()
plotPieWithPercentages Text
colName = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
plotPieWithPercentagesConfig ::
(HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
c)
withPct :: [(Text, Double)]
withPct =
[ (Text
label Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)", Double
val)
| (Text
label, Double
val) <- [(Text, Double)]
c
]
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
withPct
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
values
labels :: [Text]
labels = (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
withPct :: [(Text, Double)]
withPct =
[ (Text
label Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)", Double
val)
| (Text
label, Double
val) <- [Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values
]
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
withPct
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotPieTopN :: (HasCallStack) => Int -> T.Text -> DataFrame -> IO ()
plotPieTopN :: HasCallStack => Int -> Text -> DataFrame -> IO ()
plotPieTopN Int
n Text
colName = HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith Int
n Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
plotPieTopNWith ::
(HasCallStack) => Int -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith :: HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith Int
n Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
n [(Text, Double)]
c
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
labels :: [Text]
labels = (Int -> Text) -> [Int] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Text
"Item " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
i)) [Int
1 .. [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(Text, Double)]
paired = [Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
labels [Double]
values
grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
n [(Text, Double)]
paired
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
smartPlotPie :: (HasCallStack) => T.Text -> DataFrame -> IO ()
smartPlotPie :: HasCallStack => Text -> DataFrame -> IO ()
smartPlotPie Text
colName DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
c)
significant :: [(Text, Double)]
significant = ((Text, Double) -> Bool) -> [(Text, Double)] -> [(Text, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Text
_, Double
v) -> Double
v Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.01) [(Text, Double)]
c
config :: PlotConfig
config =
(PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
{ plotSettings =
(plotSettings (defaultPlotConfig Pie)){plotTitle = colName <> " Distribution"}
}
if [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
significant Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
6
then Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
significant (PlotConfig -> Plot
plotSettings PlotConfig
config)
else
if [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
significant Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
10
then do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
c
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let grouped :: [(Text, Double)]
grouped = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
6 [(Text, Double)]
c
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(Text, Double)]
Nothing -> HasCallStack => Text -> Maybe Text -> DataFrame -> IO ()
Text -> Maybe Text -> DataFrame -> IO ()
plotPie Text
colName Maybe Text
forall a. Maybe a
Nothing DataFrame
df
plotPieGrouped :: (HasCallStack) => T.Text -> T.Text -> DataFrame -> IO ()
plotPieGrouped :: HasCallStack => Text -> Text -> DataFrame -> IO ()
plotPieGrouped Text
groupCol Text
valCol = HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith Text
groupCol Text
valCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
plotPieGroupedWith ::
(HasCallStack) => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith Text
groupCol Text
valCol PlotConfig
config DataFrame
df = do
let colIsNumeric :: Bool
colIsNumeric = Text -> DataFrame -> Bool
isNumericColumnCheck Text
valCol DataFrame
df
if Bool
colIsNumeric
then do
let groups :: [Text]
groups = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
groupCol DataFrame
df
values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
grouped :: [(Text, Double)]
grouped = Map Text Double -> [(Text, Double)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Double -> [(Text, Double)])
-> Map Text Double -> [(Text, Double)]
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double) -> [(Text, Double)] -> Map Text Double
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) ([Text] -> [Double] -> [(Text, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
groups [Double]
values)
finalGroups :: [(Text, Double)]
finalGroups = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
8 [(Text, Double)]
grouped
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
finalGroups (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let groups :: [Text]
groups = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
groupCol DataFrame
df
vals :: [Text]
vals = HasCallStack => Text -> DataFrame -> [Text]
Text -> DataFrame -> [Text]
extractStringColumn Text
valCol DataFrame
df
combined :: [Text]
combined = (Text -> Text -> Text) -> [Text] -> [Text] -> [Text]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
g Text
v -> Text
g Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" - " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
v) [Text]
groups [Text]
vals
counts :: [(Text, Integer)]
counts = Map Text Integer -> [(Text, Integer)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Integer -> [(Text, Integer)])
-> Map Text Integer -> [(Text, Integer)]
forall a b. (a -> b) -> a -> b
$ (Integer -> Integer -> Integer)
-> [(Text, Integer)] -> Map Text Integer
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+) [(Text
c, Integer
1) | Text
c <- [Text]
combined]
finalCounts :: [(Text, Double)]
finalCounts = Int -> [(Text, Double)] -> [(Text, Double)]
groupWithOtherForPie Int
10 [(Text
k, Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
v) | (Text
k, Integer
v) <- [(Text, Integer)]
counts]
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
finalCounts (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotPieComparison :: (HasCallStack) => [T.Text] -> DataFrame -> IO ()
plotPieComparison :: HasCallStack => [Text] -> DataFrame -> IO ()
plotPieComparison [Text]
cols DataFrame
df = [Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
cols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
col DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
20) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"\n=== " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
col String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" Distribution ==="
HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
smartPlotPie Text
col DataFrame
df
Maybe [(Text, Double)]
Nothing -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
plotBinaryPie :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotBinaryPie :: HasCallStack => Text -> DataFrame -> IO ()
plotBinaryPie Text
colName DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c ->
if [(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2
then do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
c)
withPct :: [(Text, Double)]
withPct =
[ (Text
label Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)", Double
val)
| (Text
label, Double
val) <- [(Text, Double)]
c
]
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
withPct Plot
defPlot
else
String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
String
"Column "
String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName
String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not binary (has "
String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([(Text, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Text, Double)]
c)
String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" unique values)"
Maybe [(Text, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not categorical"
plotMarketShare :: (HasCallStack) => T.Text -> DataFrame -> IO ()
plotMarketShare :: HasCallStack => Text -> DataFrame -> IO ()
plotMarketShare Text
colName = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie)
plotMarketShareWith ::
(HasCallStack) => T.Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(Text, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(Text, Double)]
Text -> DataFrame -> Maybe [(Text, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(Text, Double)]
counts of
Just [(Text, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
c)
sorted :: [(Text, Double)]
sorted = ((Text, Double) -> Double) -> [(Text, Double)] -> [(Text, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((Text, Double) -> Double) -> (Text, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Double) -> Double
forall a b. (a, b) -> b
snd) [(Text, Double)]
c
significantShares :: [(Text, Double)]
significantShares = ((Text, Double) -> Bool) -> [(Text, Double)] -> [(Text, Double)]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (\(Text
_, Double
v) -> Double
v Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.02) [(Text, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double
v | (Text
_, Double
v) <- [(Text, Double)]
c, Double
v Double -> [Double] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ((Text, Double) -> Double) -> [(Text, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> Double
forall a b. (a, b) -> b
snd [(Text, Double)]
significantShares]
formatShare :: (Text, Double) -> (Text, Double)
formatShare (Text
label, Double
val) =
let pct :: Int
pct = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int
in (Text
label Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
pct) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"%)", Double
val)
shares :: [(Text, Double)]
shares = ((Text, Double) -> (Text, Double))
-> [(Text, Double)] -> [(Text, Double)]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Double) -> (Text, Double)
formatShare [(Text, Double)]
significantShares
finalShares :: [(Text, Double)]
finalShares =
if Double
otherSum Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
0 Bool -> Bool -> Bool
&& Double
otherSum Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.01
then [(Text, Double)]
shares [(Text, Double)] -> [(Text, Double)] -> [(Text, Double)]
forall a. Semigroup a => a -> a -> a
<> [(Text
"Others (<2% each)", Double
otherSum)]
else [(Text, Double)]
shares
let config' :: PlotConfig
config' =
PlotConfig
config
Text -> IO ()
T.putStrLn (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$ [(Text, Double)] -> Plot -> Text
pie [(Text, Double)]
finalShares (PlotConfig -> Plot
plotSettings PlotConfig
config')
Maybe [(Text, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not categorical"