{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE GADTs #-}
module DataFrame.Display.Terminal.Plot where
import Control.Monad
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Data.Maybe (fromMaybe, catMaybes)
import Data.Typeable (Typeable)
import Data.Type.Equality (type (:~:)(Refl), TestEquality(testEquality))
import Type.Reflection (typeRep)
import GHC.Stack (HasCallStack)
import DataFrame.Internal.Column (Column(..), Columnable)
import DataFrame.Internal.DataFrame (DataFrame(..))
import DataFrame.Operations.Core
import Granite
data PlotConfig = PlotConfig
{ PlotConfig -> PlotType
plotType :: PlotType
, PlotConfig -> String
plotTitle :: String
, PlotConfig -> Plot
plotSettings :: Plot
}
data PlotType
= Histogram'
| Scatter'
| Line'
| Bar'
| BoxPlot'
| Pie'
| StackedBar'
| Heatmap'
deriving (PlotType -> PlotType -> Bool
(PlotType -> PlotType -> Bool)
-> (PlotType -> PlotType -> Bool) -> Eq PlotType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: PlotType -> PlotType -> Bool
== :: PlotType -> PlotType -> Bool
$c/= :: PlotType -> PlotType -> Bool
/= :: PlotType -> PlotType -> Bool
Eq, Int -> PlotType -> ShowS
[PlotType] -> ShowS
PlotType -> String
(Int -> PlotType -> ShowS)
-> (PlotType -> String) -> ([PlotType] -> ShowS) -> Show PlotType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PlotType -> ShowS
showsPrec :: Int -> PlotType -> ShowS
$cshow :: PlotType -> String
show :: PlotType -> String
$cshowList :: [PlotType] -> ShowS
showList :: [PlotType] -> ShowS
Show)
defaultPlotConfig :: PlotType -> PlotConfig
defaultPlotConfig :: PlotType -> PlotConfig
defaultPlotConfig PlotType
ptype = PlotConfig
{ plotType :: PlotType
plotType = PlotType
ptype
, plotTitle :: String
plotTitle = String
""
, plotSettings :: Plot
plotSettings = Plot
defPlot
}
plotHistogram :: HasCallStack => T.Text -> DataFrame -> IO ()
plotHistogram :: HasCallStack => Text -> DataFrame -> IO ()
plotHistogram Text
colName DataFrame
df = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Histogram') DataFrame
df
plotHistogramWith :: HasCallStack => T.Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotHistogramWith Text
colName PlotConfig
config DataFrame
df = do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
(Double
minVal, Double
maxVal) = if [Double] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Double]
values then (Double
0, Double
1) else ([Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [Double]
values, [Double] -> Double
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Double]
values)
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> Bins -> [Double] -> Plot -> String
histogram (PlotConfig -> String
plotTitle PlotConfig
config) (Int -> Double -> Double -> Bins
bins Int
30 Double
minVal Double
maxVal) [Double]
values (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotScatter :: HasCallStack => T.Text -> T.Text -> DataFrame -> IO ()
plotScatter :: HasCallStack => Text -> Text -> DataFrame -> IO ()
plotScatter Text
xCol Text
yCol DataFrame
df = HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith Text
xCol Text
yCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Scatter') DataFrame
df
plotScatterWith :: HasCallStack => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotScatterWith Text
xCol Text
yCol PlotConfig
config DataFrame
df = do
let xVals :: [Double]
xVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
xCol DataFrame
df
yVals :: [Double]
yVals = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
yCol DataFrame
df
points :: [(Double, Double)]
points = [Double] -> [Double] -> [(Double, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
xVals [Double]
yVals
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, [(Double, Double)])] -> Plot -> String
scatter (PlotConfig -> String
plotTitle PlotConfig
config) [(Text -> String
T.unpack Text
xCol String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" vs " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
yCol, [(Double, Double)]
points)] (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotLines :: HasCallStack => [T.Text] -> DataFrame -> IO ()
plotLines :: HasCallStack => [Text] -> DataFrame -> IO ()
plotLines [Text]
colNames DataFrame
df = HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
[Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith [Text]
colNames (PlotType -> PlotConfig
defaultPlotConfig PlotType
Line') DataFrame
df
plotLinesWith :: HasCallStack => [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith :: HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
plotLinesWith [Text]
colNames PlotConfig
config DataFrame
df = do
[(String, [(Double, Double)])]
seriesData <- [Text]
-> (Text -> IO (String, [(Double, Double)]))
-> IO [(String, [(Double, Double)])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
colNames ((Text -> IO (String, [(Double, Double)]))
-> IO [(String, [(Double, Double)])])
-> (Text -> IO (String, [(Double, Double)]))
-> IO [(String, [(Double, Double)])]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
indices :: [Double]
indices = (Int -> Double) -> [Int] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral [Int
0..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
(String, [(Double, Double)]) -> IO (String, [(Double, Double)])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> String
T.unpack Text
col, [Double] -> [Double] -> [(Double, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
indices [Double]
values)
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, [(Double, Double)])] -> Plot -> String
lineGraph (PlotConfig -> String
plotTitle PlotConfig
config) [(String, [(Double, Double)])]
seriesData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotBoxPlots :: HasCallStack => [T.Text] -> DataFrame -> IO ()
plotBoxPlots :: HasCallStack => [Text] -> DataFrame -> IO ()
plotBoxPlots [Text]
colNames DataFrame
df = HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
[Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith [Text]
colNames (PlotType -> PlotConfig
defaultPlotConfig PlotType
BoxPlot') DataFrame
df
plotBoxPlotsWith :: HasCallStack => [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith :: HasCallStack => [Text] -> PlotConfig -> DataFrame -> IO ()
plotBoxPlotsWith [Text]
colNames PlotConfig
config DataFrame
df = do
[(String, [Double])]
boxData <- [Text]
-> (Text -> IO (String, [Double])) -> IO [(String, [Double])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
colNames ((Text -> IO (String, [Double])) -> IO [(String, [Double])])
-> (Text -> IO (String, [Double])) -> IO [(String, [Double])]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
(String, [Double]) -> IO (String, [Double])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> String
T.unpack Text
col, [Double]
values)
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, [Double])] -> Plot -> String
boxPlot (PlotConfig -> String
plotTitle PlotConfig
config) [(String, [Double])]
boxData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotStackedBars :: HasCallStack => T.Text -> [T.Text] -> DataFrame -> IO ()
plotStackedBars :: HasCallStack => Text -> [Text] -> DataFrame -> IO ()
plotStackedBars Text
categoryCol [Text]
valueColumns DataFrame
df =
HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith Text
categoryCol [Text]
valueColumns (PlotType -> PlotConfig
defaultPlotConfig PlotType
StackedBar') DataFrame
df
plotStackedBarsWith :: HasCallStack => T.Text -> [T.Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith :: HasCallStack => Text -> [Text] -> PlotConfig -> DataFrame -> IO ()
plotStackedBarsWith Text
categoryCol [Text]
valueColumns PlotConfig
config DataFrame
df = do
let categories :: [String]
categories = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
categoryCol DataFrame
df
uniqueCategories :: [String]
uniqueCategories = [String] -> [String]
forall a. Eq a => [a] -> [a]
L.nub [String]
categories
[(String, [(String, Double)])]
stackData <- [String]
-> (String -> IO (String, [(String, Double)]))
-> IO [(String, [(String, Double)])]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [String]
uniqueCategories ((String -> IO (String, [(String, Double)]))
-> IO [(String, [(String, Double)])])
-> (String -> IO (String, [(String, Double)]))
-> IO [(String, [(String, Double)])]
forall a b. (a -> b) -> a -> b
$ \String
cat -> do
let indices :: [Int]
indices = [Int
i | (Int
i, String
c) <- [Int] -> [String] -> [(Int, String)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] [String]
categories, String
c String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
cat]
[(String, Double)]
seriesData <- [Text] -> (Text -> IO (String, Double)) -> IO [(String, Double)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Text]
valueColumns ((Text -> IO (String, Double)) -> IO [(String, Double)])
-> (Text -> IO (String, Double)) -> IO [(String, Double)]
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let allValues :: [Double]
allValues = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df
values :: [Double]
values = [[Double]
allValues [Double] -> Int -> Double
forall a. HasCallStack => [a] -> Int -> a
!! Int
i | Int
i <- [Int]
indices, Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
allValues]
(String, Double) -> IO (String, Double)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> String
T.unpack Text
col, [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
values)
(String, [(String, Double)]) -> IO (String, [(String, Double)])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (String
cat, [(String, Double)]
seriesData)
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, [(String, Double)])] -> Plot -> String
stackedBars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, [(String, Double)])]
stackData (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotHeatmap :: HasCallStack => DataFrame -> IO ()
plotHeatmap :: HasCallStack => DataFrame -> IO ()
plotHeatmap DataFrame
df = HasCallStack => PlotConfig -> DataFrame -> IO ()
PlotConfig -> DataFrame -> IO ()
plotHeatmapWith (PlotType -> PlotConfig
defaultPlotConfig PlotType
Heatmap') DataFrame
df
plotHeatmapWith :: HasCallStack => PlotConfig -> DataFrame -> IO ()
plotHeatmapWith :: HasCallStack => PlotConfig -> DataFrame -> IO ()
plotHeatmapWith PlotConfig
config DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
matrix :: [[Double]]
matrix = (Text -> [Double]) -> [Text] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
col -> HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col DataFrame
df) [Text]
numericCols
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [[Double]] -> Plot -> String
heatmap (PlotConfig -> String
plotTitle PlotConfig
config) [[Double]]
matrix (PlotConfig -> Plot
plotSettings PlotConfig
config)
isNumericColumn :: DataFrame -> T.Text -> Bool
isNumericColumn :: DataFrame -> Text -> Bool
isNumericColumn DataFrame
df Text
colName =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> Bool
False
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn (Vector a
vec :: V.Vector a) ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
_ -> Bool
True
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
_ -> Bool
True
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
_ -> Bool
True
Maybe (a :~: Float)
Nothing -> Bool
False
UnboxedColumn (Vector a
vec :: VU.Vector a) ->
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
_ -> Bool
True
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
_ -> Bool
True
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
_ -> Bool
True
Maybe (a :~: Float)
Nothing -> Bool
False
Column
_ -> Bool
False
plotAllHistograms :: HasCallStack => DataFrame -> IO ()
plotAllHistograms :: HasCallStack => DataFrame -> IO ()
plotAllHistograms DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
[Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
numericCols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotHistogram Text
col DataFrame
df
plotCorrelationMatrix :: HasCallStack => DataFrame -> IO ()
plotCorrelationMatrix :: HasCallStack => DataFrame -> IO ()
plotCorrelationMatrix DataFrame
df = do
let numericCols :: [Text]
numericCols = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (DataFrame -> Text -> Bool
isNumericColumn DataFrame
df) (DataFrame -> [Text]
columnNames DataFrame
df)
let correlations :: [[Double]]
correlations = (Text -> [Double]) -> [Text] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
col1 ->
(Text -> Double) -> [Text] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
col2 -> let
vals1 :: [Double]
vals1 = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col1 DataFrame
df
vals2 :: [Double]
vals2 = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
col2 DataFrame
df
in [Double] -> [Double] -> Double
forall {a}. Floating a => [a] -> [a] -> a
correlation [Double]
vals1 [Double]
vals2) [Text]
numericCols) [Text]
numericCols
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [[Double]] -> Plot -> String
heatmap String
"Correlation Matrix" [[Double]]
correlations Plot
defPlot
where
correlation :: [a] -> [a] -> a
correlation [a]
xs [a]
ys =
let n :: a
n = Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ [a] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs
meanX :: a
meanX = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [a]
xs a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
meanY :: a
meanY = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [a]
ys a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
covXY :: a
covXY = [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
meanX) a -> a -> a
forall a. Num a => a -> a -> a
* (a
y a -> a -> a
forall a. Num a => a -> a -> a
- a
meanY) | (a
x,a
y) <- [a] -> [a] -> [(a, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [a]
xs [a]
ys] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
stdX :: a
stdX = a -> a
forall a. Floating a => a -> a
sqrt (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
meanX)a -> Integer -> a
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
2 | a
x <- [a]
xs] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
stdY :: a
stdY = a -> a
forall a. Floating a => a -> a
sqrt (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ [a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [(a
y a -> a -> a
forall a. Num a => a -> a -> a
- a
meanY)a -> Integer -> a
forall a b. (Num a, Integral b) => a -> b -> a
^Integer
2 | a
y <- [a]
ys] a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
n
in a
covXY a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
stdX a -> a -> a
forall a. Num a => a -> a -> a
* a
stdY)
quickPlot :: HasCallStack => [T.Text] -> DataFrame -> IO ()
quickPlot :: HasCallStack => [Text] -> DataFrame -> IO ()
quickPlot [] DataFrame
df = HasCallStack => DataFrame -> IO ()
DataFrame -> IO ()
plotAllHistograms DataFrame
df IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> IO ()
putStrLn String
"Plotted all numeric columns"
quickPlot [Text
col] DataFrame
df = HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotHistogram Text
col DataFrame
df
quickPlot [Text
col1, Text
col2] DataFrame
df = HasCallStack => Text -> Text -> DataFrame -> IO ()
Text -> Text -> DataFrame -> IO ()
plotScatter Text
col1 Text
col2 DataFrame
df
quickPlot [Text]
cols DataFrame
df = HasCallStack => [Text] -> DataFrame -> IO ()
[Text] -> DataFrame -> IO ()
plotLines [Text]
cols DataFrame
df
plotBars :: HasCallStack => T.Text -> DataFrame -> IO ()
plotBars :: HasCallStack => Text -> DataFrame -> IO ()
plotBars Text
colName DataFrame
df = HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith Text
colName Maybe Text
forall a. Maybe a
Nothing (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar') DataFrame
df
plotBarsWith :: HasCallStack => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith :: HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotBarsWith Text
colName Maybe Text
groupByCol PlotConfig
config DataFrame
df =
case Maybe Text
groupByCol of
Maybe Text
Nothing -> HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars Text
colName PlotConfig
config DataFrame
df
Just Text
grpCol -> HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith Text
grpCol Text
colName PlotConfig
config DataFrame
df
plotSingleBars :: HasCallStack => T.Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotSingleBars Text
colName PlotConfig
config DataFrame
df = do
let barData :: Maybe [(String, Double)]
barData = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
barData of
Just [(String, Double)]
counts -> do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
10 [(String, Double)]
counts
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
if [Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
20
then do
let labels :: [String]
labels = (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(String, Double)]
paired = [String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
10 [(String, Double)]
paired
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let labels :: [String]
labels = (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) ([String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values) (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotBarsTopN :: HasCallStack => Int -> T.Text -> DataFrame -> IO ()
plotBarsTopN :: HasCallStack => Int -> Text -> DataFrame -> IO ()
plotBarsTopN Int
n Text
colName DataFrame
df = HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith Int
n Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar') DataFrame
df
plotBarsTopNWith :: HasCallStack => Int -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith :: HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotBarsTopNWith Int
n Text
colName PlotConfig
config DataFrame
df = do
let barData :: Maybe [(String, Double)]
barData = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
barData of
Just [(String, Double)]
counts -> do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
n [(String, Double)]
counts
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
labels :: [String]
labels = (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(String, Double)]
paired = [String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
n [(String, Double)]
paired
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotGroupedBarsWith :: HasCallStack => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotGroupedBarsWith Text
groupCol Text
valCol PlotConfig
config DataFrame
df = do
let isNumeric :: Bool
isNumeric = Text -> DataFrame -> Bool
isNumericColumnCheck Text
valCol DataFrame
df
if Bool
isNumeric
then do
let groups :: [String]
groups = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
groupCol DataFrame
df
values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
grouped :: [(String, Double)]
grouped = Map String Double -> [(String, Double)]
forall k a. Map k a -> [(k, a)]
M.toList (Map String Double -> [(String, Double)])
-> Map String Double -> [(String, Double)]
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double)
-> [(String, Double)] -> Map String Double
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) ([String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
groups [Double]
values)
finalGroups :: [(String, Double)]
finalGroups = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
10 [(String, Double)]
grouped
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
finalGroups (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let groups :: [String]
groups = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
groupCol DataFrame
df
vals :: [String]
vals = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
valCol DataFrame
df
pairs :: [(String, String)]
pairs = [String] -> [String] -> [(String, String)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
groups [String]
vals
counts :: [(String, Integer)]
counts = Map String Integer -> [(String, Integer)]
forall k a. Map k a -> [(k, a)]
M.toList (Map String Integer -> [(String, Integer)])
-> Map String Integer -> [(String, Integer)]
forall a b. (a -> b) -> a -> b
$ (Integer -> Integer -> Integer)
-> [(String, Integer)] -> Map String Integer
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+)
[(String
g String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" - " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
v, Integer
1) | (String
g, String
v) <- [(String, String)]
pairs]
finalCounts :: [(String, Double)]
finalCounts = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
15 [(String
k, Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
v) | (String
k, Integer
v) <- [(String, Integer)]
counts]
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
finalCounts (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotValueCounts :: HasCallStack => T.Text -> DataFrame -> IO ()
plotValueCounts :: HasCallStack => Text -> DataFrame -> IO ()
plotValueCounts Text
colName DataFrame
df = HasCallStack => Text -> Int -> PlotConfig -> DataFrame -> IO ()
Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith Text
colName Int
10 (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar') DataFrame
df
plotValueCountsWith :: HasCallStack => T.Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith :: HasCallStack => Text -> Int -> PlotConfig -> DataFrame -> IO ()
plotValueCountsWith Text
colName Int
maxBars PlotConfig
config DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
maxBars [(String, Double)]
c
config' :: PlotConfig
config' = PlotConfig
config { plotTitle = if null (plotTitle config)
then "Value counts for " ++ T.unpack colName
else plotTitle config }
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (Text -> String
T.unpack Text
colName) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config')
Maybe [(String, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Could not get value counts for column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName
plotBarsWithPercentages :: HasCallStack => T.Text -> DataFrame -> IO ()
plotBarsWithPercentages :: HasCallStack => Text -> DataFrame -> IO ()
plotBarsWithPercentages Text
colName DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
c)
percentages :: [(String, Double)]
percentages = [(String
label String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
val)
| (String
label, Double
val) <- [(String, Double)]
c]
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
10 [(String, Double)]
percentages
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (String
"Distribution of " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName) [(String, Double)]
grouped Plot
defPlot
Maybe [(String, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Could not get value counts for column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName
smartPlotBars :: HasCallStack => T.Text -> DataFrame -> IO ()
smartPlotBars :: HasCallStack => Text -> DataFrame -> IO ()
smartPlotBars Text
colName DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let numUnique :: Int
numUnique = [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c
config :: PlotConfig
config = (PlotType -> PlotConfig
defaultPlotConfig PlotType
Bar') {
plotTitle = T.unpack colName ++ " (" ++ show numUnique ++ " unique values)"
}
if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
12
then String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
c (PlotConfig -> Plot
plotSettings PlotConfig
config)
else if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
20
then do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
12 [(String, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" - Top 12 + Other") [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
10 [(String, Double)]
c
otherCount :: Int
otherCount = Int
numUnique Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
10
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
bars (PlotConfig -> String
plotTitle PlotConfig
config String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" - Top 10 + Other (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
otherCount String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" items)")
[(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotBars Text
colName DataFrame
df
plotCategoricalSummary :: HasCallStack => DataFrame -> IO ()
plotCategoricalSummary :: HasCallStack => DataFrame -> IO ()
plotCategoricalSummary DataFrame
df = do
let cols :: [Text]
cols = DataFrame -> [Text]
columnNames DataFrame
df
[Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
cols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
col DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
let numUnique :: Int
numUnique = [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"\n=== " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
col String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
numUnique String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" unique values) ==="
if Int
numUnique Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
15 then HasCallStack => Int -> Text -> DataFrame -> IO ()
Int -> Text -> DataFrame -> IO ()
plotBarsTopN Int
10 Text
col DataFrame
df else HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
plotBars Text
col DataFrame
df
Maybe [(String, Double)]
Nothing -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
getCategoricalCounts :: HasCallStack => T.Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts :: HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> Maybe [(String, Double)]
forall a. HasCallStack => String -> a
error (String -> Maybe [(String, Double)])
-> String -> Maybe [(String, Double)]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec -> let counts :: [(a, Int)]
counts = Vector a -> [(a, Int)]
forall a. (Ord a, Show a) => Vector a -> [(a, Int)]
countValues Vector a
vec
in [(String, Double)] -> Maybe [(String, Double)]
forall a. a -> Maybe a
Just [(a -> String
forall a. Show a => a -> String
show a
k, Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) | (a
k, Int
v) <- [(a, Int)]
counts]
UnboxedColumn Vector a
vec -> let counts :: [(a, Int)]
counts = Vector a -> [(a, Int)]
forall a. (Ord a, Show a, Unbox a) => Vector a -> [(a, Int)]
countValuesUnboxed Vector a
vec
in [(String, Double)] -> Maybe [(String, Double)]
forall a. a -> Maybe a
Just [(a -> String
forall a. Show a => a -> String
show a
k, Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) | (a
k, Int
v) <- [(a, Int)]
counts]
where
countValues :: (Ord a, Show a) => V.Vector a -> [(a, Int)]
countValues :: forall a. (Ord a, Show a) => Vector a -> [(a, Int)]
countValues Vector a
vec = Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map a Int -> [(a, Int)]) -> Map a Int -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ (a -> Map a Int -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. (a -> b -> b) -> b -> Vector a -> b
V.foldr' (\a
x Map a Int
acc -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
x Int
1 Map a Int
acc) Map a Int
forall k a. Map k a
M.empty Vector a
vec
countValuesUnboxed :: (Ord a, Show a, VU.Unbox a) => VU.Vector a -> [(a, Int)]
countValuesUnboxed :: forall a. (Ord a, Show a, Unbox a) => Vector a -> [(a, Int)]
countValuesUnboxed Vector a
vec = Map a Int -> [(a, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map a Int -> [(a, Int)]) -> Map a Int -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ (a -> Map a Int -> Map a Int) -> Map a Int -> Vector a -> Map a Int
forall a b. Unbox a => (a -> b -> b) -> b -> Vector a -> b
VU.foldr' (\a
x Map a Int
acc -> (Int -> Int -> Int) -> a -> Int -> Map a Int -> Map a Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) a
x Int
1 Map a Int
acc) Map a Int
forall k a. Map k a
M.empty Vector a
vec
isNumericColumnCheck :: T.Text -> DataFrame -> Bool
isNumericColumnCheck :: Text -> DataFrame -> Bool
isNumericColumnCheck Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> Bool
False
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn (Vector a
vec :: V.Vector a) -> forall a. Typeable a => Bool
isNumericType @a
UnboxedColumn (Vector a
vec :: VU.Vector a) -> forall a. Typeable a => Bool
isNumericType @a
isNumericType :: forall a. Typeable a => Bool
isNumericType :: forall a. Typeable a => Bool
isNumericType =
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
_ -> Bool
True
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
_ -> Bool
True
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
_ -> Bool
True
Maybe (a :~: Float)
Nothing -> case TypeRep a -> TypeRep Integer -> Maybe (a :~: Integer)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Integer) of
Just a :~: Integer
_ -> Bool
True
Maybe (a :~: Integer)
Nothing -> Bool
False
extractStringColumn :: HasCallStack => T.Text -> DataFrame -> [String]
extractStringColumn :: HasCallStack => Text -> DataFrame -> [String]
extractStringColumn Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> [String]
forall a. HasCallStack => String -> a
error (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec -> Vector String -> [String]
forall a. Vector a -> [a]
V.toList (Vector String -> [String]) -> Vector String -> [String]
forall a b. (a -> b) -> a -> b
$ (a -> String) -> Vector a -> Vector String
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> String
forall a. Show a => a -> String
show Vector a
vec
UnboxedColumn Vector a
vec -> Vector String -> [String]
forall a. Vector a -> [a]
V.toList (Vector String -> [String]) -> Vector String -> [String]
forall a b. (a -> b) -> a -> b
$ (a -> String) -> Vector a -> Vector String
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
VG.map a -> String
forall a. Show a => a -> String
show (Vector a -> Vector a
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VG.convert Vector a
vec)
extractNumericColumn :: HasCallStack => T.Text -> DataFrame -> [Double]
extractNumericColumn :: HasCallStack => Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df =
case Text -> Map Text Int -> Maybe Int
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
colName (DataFrame -> Map Text Int
columnIndices DataFrame
df) of
Maybe Int
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found"
Just Int
idx ->
let col :: Column
col = DataFrame -> Vector Column
columns DataFrame
df Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
idx
in case Column
col of
BoxedColumn Vector a
vec -> Vector a -> [Double]
forall a. (Typeable a, Show a) => Vector a -> [Double]
vectorToDoubles Vector a
vec
UnboxedColumn Vector a
vec -> Vector a -> [Double]
forall a. (Typeable a, Unbox a, Show a) => Vector a -> [Double]
unboxedVectorToDoubles Vector a
vec
vectorToDoubles :: forall a. (Typeable a, Show a) => V.Vector a -> [Double]
vectorToDoubles :: forall a. (Typeable a, Show a) => Vector a -> [Double]
vectorToDoubles Vector a
vec =
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList Vector a
Vector Double
vec
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Integer -> Maybe (a :~: Integer)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Integer) of
Just a :~: Integer
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Integer)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> [Double]
forall a. Vector a -> [a]
V.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
vec
Maybe (a :~: Float)
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column is not numeric (type: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
unboxedVectorToDoubles :: forall a. (Typeable a, VU.Unbox a, Show a) => VU.Vector a -> [Double]
unboxedVectorToDoubles :: forall a. (Typeable a, Unbox a, Show a) => Vector a -> [Double]
unboxedVectorToDoubles Vector a
vec =
case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
Just a :~: Double
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector a
Vector Double
vec
Maybe (a :~: Double)
Nothing -> case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
Just a :~: Int
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Vector a
vec
Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Float -> Maybe (a :~: Float)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Float) of
Just a :~: Float
Refl -> Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList (Vector Double -> [Double]) -> Vector Double -> [Double]
forall a b. (a -> b) -> a -> b
$ (a -> Double) -> Vector a -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac Vector a
vec
Maybe (a :~: Float)
Nothing -> String -> [Double]
forall a. HasCallStack => String -> a
error (String -> [Double]) -> String -> [Double]
forall a b. (a -> b) -> a -> b
$ String
"Column is not numeric (type: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ TypeRep a -> String
forall a. Show a => a -> String
show (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
groupWithOther :: Int -> [(String, Double)] -> [(String, Double)]
groupWithOther :: Int -> [(String, Double)] -> [(String, Double)]
groupWithOther Int
n [(String, Double)]
items =
let sorted :: [(String, Double)]
sorted = ((String, Double) -> Double)
-> [(String, Double)] -> [(String, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((String, Double) -> Double) -> (String, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String, Double) -> Double
forall a b. (a, b) -> b
snd) [(String, Double)]
items
([(String, Double)]
topN, [(String, Double)]
rest) = Int
-> [(String, Double)] -> ([(String, Double)], [(String, Double)])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [(String, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
rest)
result :: [(String, Double)]
result = if [(String, Double)] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(String, Double)]
rest Bool -> Bool -> Bool
|| Double
otherSum Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
then [(String, Double)]
topN
else [(String, Double)]
topN [(String, Double)] -> [(String, Double)] -> [(String, Double)]
forall a. [a] -> [a] -> [a]
++ [(String
"Other (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
rest) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" items)", Double
otherSum)]
in [(String, Double)]
result
plotPie :: HasCallStack => T.Text -> Maybe T.Text -> DataFrame -> IO ()
plotPie :: HasCallStack => Text -> Maybe Text -> DataFrame -> IO ()
plotPie Text
valCol Maybe Text
labelCol DataFrame
df = HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith Text
valCol Maybe Text
labelCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') DataFrame
df
plotPieWith :: HasCallStack => T.Text -> Maybe T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith :: HasCallStack =>
Text -> Maybe Text -> PlotConfig -> DataFrame -> IO ()
plotPieWith Text
valCol Maybe Text
labelCol PlotConfig
config DataFrame
df = do
let categoricalData :: Maybe [(String, Double)]
categoricalData = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
valCol DataFrame
df
case Maybe [(String, Double)]
categoricalData of
Just [(String, Double)]
counts -> do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
counts
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
labels :: [String]
labels = case Maybe Text
labelCol of
Maybe Text
Nothing -> (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
Just Text
lCol -> HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
lCol DataFrame
df
let pieData :: [(String, Double)]
pieData = [String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values
grouped :: [(String, Double)]
grouped = if [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
pieData Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10
then Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
pieData
else [(String, Double)]
pieData
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
groupWithOtherForPie :: Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie :: Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
n [(String, Double)]
items =
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
items)
sorted :: [(String, Double)]
sorted = ((String, Double) -> Double)
-> [(String, Double)] -> [(String, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((String, Double) -> Double) -> (String, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String, Double) -> Double
forall a b. (a, b) -> b
snd) [(String, Double)]
items
([(String, Double)]
topN, [(String, Double)]
rest) = Int
-> [(String, Double)] -> ([(String, Double)], [(String, Double)])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n [(String, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
rest)
otherPct :: Int
otherPct = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
otherSum Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int
result :: [(String, Double)]
result = if [(String, Double)] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(String, Double)]
rest Bool -> Bool -> Bool
|| Double
otherSum Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
then [(String, Double)]
topN
else [(String, Double)]
topN [(String, Double)] -> [(String, Double)] -> [(String, Double)]
forall a. [a] -> [a] -> [a]
++ [(String
"Other (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show ([(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
rest) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" items, " String -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> String
forall a. Show a => a -> String
show Int
otherPct String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
otherSum)]
in [(String, Double)]
result
plotPieWithPercentages :: HasCallStack => T.Text -> DataFrame -> IO ()
plotPieWithPercentages :: HasCallStack => Text -> DataFrame -> IO ()
plotPieWithPercentages Text
colName DataFrame
df = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') DataFrame
df
plotPieWithPercentagesConfig :: HasCallStack => T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotPieWithPercentagesConfig Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
c)
withPct :: [(String, Double)]
withPct = [(String
label String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
val)
| (String
label, Double
val) <- [(String, Double)]
c]
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
withPct
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
values
labels :: [String]
labels = (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
withPct :: [(String, Double)]
withPct = [(String
label String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
val)
| (String
label, Double
val) <- [String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values]
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
withPct
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotPieTopN :: HasCallStack => Int -> T.Text -> DataFrame -> IO ()
plotPieTopN :: HasCallStack => Int -> Text -> DataFrame -> IO ()
plotPieTopN Int
n Text
colName DataFrame
df = HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith Int
n Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') DataFrame
df
plotPieTopNWith :: HasCallStack => Int -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith :: HasCallStack => Int -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieTopNWith Int
n Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
n [(String, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> do
let values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
colName DataFrame
df
labels :: [String]
labels = (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> String
"Item " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i) [Int
1..[Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
values]
paired :: [(String, Double)]
paired = [String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
labels [Double]
values
grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
n [(String, Double)]
paired
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
smartPlotPie :: HasCallStack => T.Text -> DataFrame -> IO ()
smartPlotPie :: HasCallStack => Text -> DataFrame -> IO ()
smartPlotPie Text
colName DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let numUnique :: Int
numUnique = [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c
total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
c)
significant :: [(String, Double)]
significant = ((String, Double) -> Bool)
-> [(String, Double)] -> [(String, Double)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(String
_, Double
v) -> Double
v Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.01) [(String, Double)]
c
config :: PlotConfig
config = (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') {
plotTitle = T.unpack colName ++ " Distribution"
}
if [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
significant Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
6
then String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
significant (PlotConfig -> Plot
plotSettings PlotConfig
config)
else if [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
significant Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
10
then do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let grouped :: [(String, Double)]
grouped = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
6 [(String, Double)]
c
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
grouped (PlotConfig -> Plot
plotSettings PlotConfig
config)
Maybe [(String, Double)]
Nothing -> HasCallStack => Text -> Maybe Text -> DataFrame -> IO ()
Text -> Maybe Text -> DataFrame -> IO ()
plotPie Text
colName Maybe Text
forall a. Maybe a
Nothing DataFrame
df
plotPieGrouped :: HasCallStack => T.Text -> T.Text -> DataFrame -> IO ()
plotPieGrouped :: HasCallStack => Text -> Text -> DataFrame -> IO ()
plotPieGrouped Text
groupCol Text
valCol DataFrame
df = HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith Text
groupCol Text
valCol (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') DataFrame
df
plotPieGroupedWith :: HasCallStack => T.Text -> T.Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith :: HasCallStack => Text -> Text -> PlotConfig -> DataFrame -> IO ()
plotPieGroupedWith Text
groupCol Text
valCol PlotConfig
config DataFrame
df = do
let isNumeric :: Bool
isNumeric = Text -> DataFrame -> Bool
isNumericColumnCheck Text
valCol DataFrame
df
if Bool
isNumeric
then do
let groups :: [String]
groups = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
groupCol DataFrame
df
values :: [Double]
values = HasCallStack => Text -> DataFrame -> [Double]
Text -> DataFrame -> [Double]
extractNumericColumn Text
valCol DataFrame
df
grouped :: [(String, Double)]
grouped = Map String Double -> [(String, Double)]
forall k a. Map k a -> [(k, a)]
M.toList (Map String Double -> [(String, Double)])
-> Map String Double -> [(String, Double)]
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double)
-> [(String, Double)] -> Map String Double
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) ([String] -> [Double] -> [(String, Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [String]
groups [Double]
values)
finalGroups :: [(String, Double)]
finalGroups = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
8 [(String, Double)]
grouped
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
finalGroups (PlotConfig -> Plot
plotSettings PlotConfig
config)
else do
let groups :: [String]
groups = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
groupCol DataFrame
df
vals :: [String]
vals = HasCallStack => Text -> DataFrame -> [String]
Text -> DataFrame -> [String]
extractStringColumn Text
valCol DataFrame
df
combined :: [String]
combined = (String -> ShowS) -> [String] -> [String] -> [String]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\String
g String
v -> String
g String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" - " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
v) [String]
groups [String]
vals
counts :: [(String, Integer)]
counts = Map String Integer -> [(String, Integer)]
forall k a. Map k a -> [(k, a)]
M.toList (Map String Integer -> [(String, Integer)])
-> Map String Integer -> [(String, Integer)]
forall a b. (a -> b) -> a -> b
$ (Integer -> Integer -> Integer)
-> [(String, Integer)] -> Map String Integer
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+) [(String
c, Integer
1) | String
c <- [String]
combined]
finalCounts :: [(String, Double)]
finalCounts = Int -> [(String, Double)] -> [(String, Double)]
groupWithOtherForPie Int
10 [(String
k, Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
v) | (String
k, Integer
v) <- [(String, Integer)]
counts]
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config) [(String, Double)]
finalCounts (PlotConfig -> Plot
plotSettings PlotConfig
config)
plotPieComparison :: HasCallStack => [T.Text] -> DataFrame -> IO ()
plotPieComparison :: HasCallStack => [Text] -> DataFrame -> IO ()
plotPieComparison [Text]
cols DataFrame
df = do
[Text] -> (Text -> IO ()) -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Text]
cols ((Text -> IO ()) -> IO ()) -> (Text -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Text
col -> do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
col DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
1 Bool -> Bool -> Bool
&& [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
20) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"\n=== " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
col String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" Distribution ==="
HasCallStack => Text -> DataFrame -> IO ()
Text -> DataFrame -> IO ()
smartPlotPie Text
col DataFrame
df
Maybe [(String, Double)]
Nothing -> () -> IO ()
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
plotBinaryPie :: HasCallStack => T.Text -> DataFrame -> IO ()
plotBinaryPie :: HasCallStack => Text -> DataFrame -> IO ()
plotBinaryPie Text
colName DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c ->
if [(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2
then do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
c)
withPct :: [(String, Double)]
withPct = [(String
label String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
val)
| (String
label, Double
val) <- [(String, Double)]
c]
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" Proportion") [(String, Double)]
withPct Plot
defPlot
else String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not binary (has " String -> ShowS
forall a. [a] -> [a] -> [a]
++
Int -> String
forall a. Show a => a -> String
show ([(String, Double)] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [(String, Double)]
c) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" unique values)"
Maybe [(String, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not categorical"
plotMarketShare :: HasCallStack => T.Text -> DataFrame -> IO ()
plotMarketShare :: HasCallStack => Text -> DataFrame -> IO ()
plotMarketShare Text
colName DataFrame
df = HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith Text
colName (PlotType -> PlotConfig
defaultPlotConfig PlotType
Pie') DataFrame
df
plotMarketShareWith :: HasCallStack => T.Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith :: HasCallStack => Text -> PlotConfig -> DataFrame -> IO ()
plotMarketShareWith Text
colName PlotConfig
config DataFrame
df = do
let counts :: Maybe [(String, Double)]
counts = HasCallStack => Text -> DataFrame -> Maybe [(String, Double)]
Text -> DataFrame -> Maybe [(String, Double)]
getCategoricalCounts Text
colName DataFrame
df
case Maybe [(String, Double)]
counts of
Just [(String, Double)]
c -> do
let total :: Double
total = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
c)
sorted :: [(String, Double)]
sorted = ((String, Double) -> Double)
-> [(String, Double)] -> [(String, Double)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
L.sortOn (Double -> Double
forall a. Num a => a -> a
negate (Double -> Double)
-> ((String, Double) -> Double) -> (String, Double) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String, Double) -> Double
forall a b. (a, b) -> b
snd) [(String, Double)]
c
significantShares :: [(String, Double)]
significantShares = ((String, Double) -> Bool)
-> [(String, Double)] -> [(String, Double)]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (\(String
_, Double
v) -> Double
v Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.02) [(String, Double)]
sorted
otherSum :: Double
otherSum = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double
v | (String
_, Double
v) <- [(String, Double)]
c, Double
v Double -> [Double] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ((String, Double) -> Double) -> [(String, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> Double
forall a b. (a, b) -> b
snd [(String, Double)]
significantShares]
formatShare :: (String, Double) -> (String, Double)
formatShare (String
label, Double
val) =
let pct :: Int
pct = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
100 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
val Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total) :: Int
in (String
label String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" (" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
pct String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"%)", Double
val)
shares :: [(String, Double)]
shares = ((String, Double) -> (String, Double))
-> [(String, Double)] -> [(String, Double)]
forall a b. (a -> b) -> [a] -> [b]
map (String, Double) -> (String, Double)
formatShare [(String, Double)]
significantShares
finalShares :: [(String, Double)]
finalShares = if Double
otherSum Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
0 Bool -> Bool -> Bool
&& Double
otherSum Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
total Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.01
then [(String, Double)]
shares [(String, Double)] -> [(String, Double)] -> [(String, Double)]
forall a. [a] -> [a] -> [a]
++ [(String
"Others (<2% each)", Double
otherSum)]
else [(String, Double)]
shares
let config' :: PlotConfig
config' = PlotConfig
config { plotTitle = if null (plotTitle config)
then T.unpack colName ++ " Market Share"
else plotTitle config }
String -> IO ()
putStrLn (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String -> [(String, Double)] -> Plot -> String
pie (PlotConfig -> String
plotTitle PlotConfig
config') [(String, Double)]
finalShares (PlotConfig -> Plot
plotSettings PlotConfig
config')
Maybe [(String, Double)]
Nothing -> String -> IO ()
forall a. HasCallStack => String -> a
error (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$ String
"Column " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
colName String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" is not categorical"