dataframe-1.0.0.1: A fast, safe, and intuitive DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame.DecisionTree

Synopsis

Documentation

data Tree a Source #

Constructors

Leaf !a 
Branch !(Expr Bool) !(Tree a) !(Tree a) 

Instances

Instances details
Show a => Show (Tree a) Source # 
Instance details

Defined in DataFrame.DecisionTree

Methods

showsPrec :: Int -> Tree a -> ShowS #

show :: Tree a -> String #

showList :: [Tree a] -> ShowS #

Eq a => Eq (Tree a) Source # 
Instance details

Defined in DataFrame.DecisionTree

Methods

(==) :: Tree a -> Tree a -> Bool #

(/=) :: Tree a -> Tree a -> Bool #

fitDecisionTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Expr a Source #

Fit a TAO decision tree

taoOptimize :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #

taoIteration :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #

optimizeDepthLevel :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Tree a Source #

optimizeAtDepth :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Int -> Tree a Source #

optimizeNode :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #

data CarePoint Source #

A care point with its index and which direction leads to correct classification

Constructors

CarePoint 

Instances

Instances details
Show CarePoint Source # 
Instance details

Defined in DataFrame.DecisionTree

Eq CarePoint Source # 
Instance details

Defined in DataFrame.DecisionTree

data Direction Source #

Constructors

GoLeft 
GoRight 

Instances

Instances details
Show Direction Source # 
Instance details

Defined in DataFrame.DecisionTree

Eq Direction Source # 
Instance details

Defined in DataFrame.DecisionTree

identifyCarePoints :: Columnable a => Text -> DataFrame -> Vector Int -> Tree a -> Tree a -> [CarePoint] Source #

Identify care points: points where exactly one subtree classifies correctly

For each point reaching the node: 1. Compute what label the left subtree would predict 2. Compute what label the right subtree would predict 3. If exactly one matches the true label, it's a care point 4. Record which direction leads to correct classification

predictWithTree :: Columnable a => Text -> DataFrame -> Int -> Tree a -> a Source #

Predict the label for a single point using a fixed tree

partitionIndices :: Expr Bool -> DataFrame -> Vector Int -> (Vector Int, Vector Int) Source #

pruneExpr :: (Columnable a, Eq a) => Expr a -> Expr a Source #

data NumExpr Source #

Unifies non-nullable and nullable Double expressions for feature generation.

pruneTree :: (Columnable a, Eq a) => Expr a -> Expr a Source #

type ProbTree a = Tree (Map a Double) Source #

A tree where each leaf stores a class-probability distribution.

probsFromIndices :: Columnable a => Text -> DataFrame -> Vector Int -> Map a Double Source #

Compute normalised class probabilities from a subset of training rows.

buildProbTree :: Columnable a => Tree a -> Text -> DataFrame -> Vector Int -> ProbTree a Source #

Annotate a fitted 'Tree a' with class distributions by routing the training data through it. The split conditions are preserved; only the leaf values change from a majority label to a probability map.

fitProbTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Map a (Expr Double) Source #

Fit a TAO decision tree and return one Expr Double per class.

Each (c, e) pair in the result map means: evaluate e on a DataFrame row to get the predicted probability of class c. You can insert these as new columns with derive or evaluate them with interpret.

Example: let pes = fitProbTree @T.Text cfg (Col "species") trainDf -- pes M.! "setosa" :: Expr Double df' = M.foldlWithKey' (\d cls e -> D.derive (cls <> "_prob") e d) testDf pes

probExprs :: Columnable a => ProbTree a -> Map a (Expr Double) Source #

Convert a ProbTree into one 'Expr Double' per class.

Each (c, e) pair means: evaluate e on a DataFrame row to get the predicted probability of class c. You can insert these as new columns with derive or evaluate them with interpret.

Example: let pt = fitProbTree @T.Text cfg (Col "species") trainDf pes = probExprs pt -- pes M.! "setosa" :: Expr Double df' = M.foldlWithKey' (\d cls e -> D.derive (cls <> "_prob") e d) testDf pes