| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
DataFrame.DecisionTree
Synopsis
- data TreeConfig = TreeConfig {}
- data SynthConfig = SynthConfig {}
- defaultSynthConfig :: SynthConfig
- defaultTreeConfig :: TreeConfig
- data Tree a
- treeDepth :: Tree a -> Int
- treeToExpr :: Columnable a => Tree a -> Expr a
- fitDecisionTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Expr a
- taoOptimize :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a
- taoIteration :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a
- optimizeDepthLevel :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Tree a
- optimizeAtDepth :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Int -> Tree a
- optimizeNode :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a
- findBestSplitTAO :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a -> Expr Bool -> Expr Bool
- data CarePoint = CarePoint {
- cpIndex :: !Int
- cpCorrectDir :: !Direction
- data Direction
- identifyCarePoints :: Columnable a => Text -> DataFrame -> Vector Int -> Tree a -> Tree a -> [CarePoint]
- predictWithTree :: Columnable a => Text -> DataFrame -> Int -> Tree a -> a
- countCarePointErrors :: Expr Bool -> DataFrame -> [CarePoint] -> Int
- partitionIndices :: Expr Bool -> DataFrame -> Vector Int -> (Vector Int, Vector Int)
- majorityValueFromIndices :: Columnable a => Text -> DataFrame -> Vector Int -> a
- computeTreeLoss :: Columnable a => Text -> DataFrame -> Vector Int -> Tree a -> Double
- pruneDead :: Tree a -> Tree a
- pruneExpr :: (Columnable a, Eq a) => Expr a -> Expr a
- buildGreedyTree :: Columnable a => TreeConfig -> Int -> Text -> [Expr Bool] -> DataFrame -> Tree a
- findBestGreedySplit :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Maybe (Expr Bool)
- data NumExpr
- numExprCols :: NumExpr -> [Text]
- numExprEq :: NumExpr -> NumExpr -> Bool
- combineNumExprs :: NumExpr -> NumExpr -> [NumExpr]
- numericConditions :: TreeConfig -> DataFrame -> [Expr Bool]
- generateNumericConds :: TreeConfig -> DataFrame -> [Expr Bool]
- numericExprsWithTerms :: SynthConfig -> DataFrame -> [NumExpr]
- numericCols :: DataFrame -> [NumExpr]
- numericExprs :: SynthConfig -> DataFrame -> [NumExpr] -> Int -> Int -> [NumExpr]
- boolExprs :: DataFrame -> [Expr Bool] -> [Expr Bool] -> Int -> Int -> [Expr Bool]
- generateConditionsOld :: TreeConfig -> DataFrame -> [Expr Bool]
- partitionDataFrame :: Expr Bool -> DataFrame -> (DataFrame, DataFrame)
- calculateGini :: Columnable a => Text -> DataFrame -> Double
- majorityValue :: Columnable a => Text -> DataFrame -> a
- getCounts :: Columnable a => Text -> DataFrame -> Map a Int
- percentile :: Int -> Expr Double -> DataFrame -> Double
- buildTree :: Columnable a => TreeConfig -> Int -> Text -> [Expr Bool] -> DataFrame -> Expr a
- findBestSplit :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Maybe (Expr Bool)
- pruneTree :: (Columnable a, Eq a) => Expr a -> Expr a
- type ProbTree a = Tree (Map a Double)
- probsFromIndices :: Columnable a => Text -> DataFrame -> Vector Int -> Map a Double
- buildProbTree :: Columnable a => Tree a -> Text -> DataFrame -> Vector Int -> ProbTree a
- fitProbTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Map a (Expr Double)
- probExprs :: Columnable a => ProbTree a -> Map a (Expr Double)
Documentation
data TreeConfig Source #
Constructors
| TreeConfig | |
Fields
| |
Instances
| Show TreeConfig Source # | |
Defined in DataFrame.DecisionTree Methods showsPrec :: Int -> TreeConfig -> ShowS # show :: TreeConfig -> String # showList :: [TreeConfig] -> ShowS # | |
| Eq TreeConfig Source # | |
Defined in DataFrame.DecisionTree | |
data SynthConfig Source #
Constructors
| SynthConfig | |
Fields
| |
Instances
| Show SynthConfig Source # | |
Defined in DataFrame.DecisionTree Methods showsPrec :: Int -> SynthConfig -> ShowS # show :: SynthConfig -> String # showList :: [SynthConfig] -> ShowS # | |
| Eq SynthConfig Source # | |
Defined in DataFrame.DecisionTree | |
treeToExpr :: Columnable a => Tree a -> Expr a Source #
fitDecisionTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Expr a Source #
Fit a TAO decision tree
taoOptimize :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #
taoIteration :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #
optimizeDepthLevel :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Tree a Source #
optimizeAtDepth :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Int -> Int -> Tree a Source #
optimizeNode :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a Source #
findBestSplitTAO :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Vector Int -> Tree a -> Tree a -> Expr Bool -> Expr Bool Source #
A care point with its index and which direction leads to correct classification
Constructors
| CarePoint | |
Fields
| |
identifyCarePoints :: Columnable a => Text -> DataFrame -> Vector Int -> Tree a -> Tree a -> [CarePoint] Source #
Identify care points: points where exactly one subtree classifies correctly
For each point reaching the node: 1. Compute what label the left subtree would predict 2. Compute what label the right subtree would predict 3. If exactly one matches the true label, it's a care point 4. Record which direction leads to correct classification
predictWithTree :: Columnable a => Text -> DataFrame -> Int -> Tree a -> a Source #
Predict the label for a single point using a fixed tree
majorityValueFromIndices :: Columnable a => Text -> DataFrame -> Vector Int -> a Source #
computeTreeLoss :: Columnable a => Text -> DataFrame -> Vector Int -> Tree a -> Double Source #
buildGreedyTree :: Columnable a => TreeConfig -> Int -> Text -> [Expr Bool] -> DataFrame -> Tree a Source #
findBestGreedySplit :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Maybe (Expr Bool) Source #
Unifies non-nullable and nullable Double expressions for feature generation.
numExprCols :: NumExpr -> [Text] Source #
numericConditions :: TreeConfig -> DataFrame -> [Expr Bool] Source #
generateNumericConds :: TreeConfig -> DataFrame -> [Expr Bool] Source #
numericExprsWithTerms :: SynthConfig -> DataFrame -> [NumExpr] Source #
numericCols :: DataFrame -> [NumExpr] Source #
numericExprs :: SynthConfig -> DataFrame -> [NumExpr] -> Int -> Int -> [NumExpr] Source #
generateConditionsOld :: TreeConfig -> DataFrame -> [Expr Bool] Source #
calculateGini :: Columnable a => Text -> DataFrame -> Double Source #
majorityValue :: Columnable a => Text -> DataFrame -> a Source #
buildTree :: Columnable a => TreeConfig -> Int -> Text -> [Expr Bool] -> DataFrame -> Expr a Source #
findBestSplit :: Columnable a => TreeConfig -> Text -> [Expr Bool] -> DataFrame -> Maybe (Expr Bool) Source #
type ProbTree a = Tree (Map a Double) Source #
A tree where each leaf stores a class-probability distribution.
probsFromIndices :: Columnable a => Text -> DataFrame -> Vector Int -> Map a Double Source #
Compute normalised class probabilities from a subset of training rows.
buildProbTree :: Columnable a => Tree a -> Text -> DataFrame -> Vector Int -> ProbTree a Source #
Annotate a fitted 'Tree a' with class distributions by routing the training data through it. The split conditions are preserved; only the leaf values change from a majority label to a probability map.
fitProbTree :: Columnable a => TreeConfig -> Expr a -> DataFrame -> Map a (Expr Double) Source #
Fit a TAO decision tree and return one Expr Double per class.
Each (c, e) pair in the result map means: evaluate e on a DataFrame
row to get the predicted probability of class c. You can insert these
as new columns with derive or evaluate them with interpret.
Example:
let pes = fitProbTree @T.Text cfg (Col "species") trainDf
-- pes M.! "setosa" :: Expr Double
df' = M.foldlWithKey' (\d cls e -> D.derive (cls <> "_prob") e d) testDf pes
probExprs :: Columnable a => ProbTree a -> Map a (Expr Double) Source #
Convert a ProbTree into one 'Expr Double' per class.
Each (c, e) pair means: evaluate e on a DataFrame row to get the
predicted probability of class c. You can insert these as new columns
with derive or evaluate them with interpret.
Example:
let pt = fitProbTree @T.Text cfg (Col "species") trainDf
pes = probExprs pt
-- pes M.! "setosa" :: Expr Double
df' = M.foldlWithKey' (\d cls e -> D.derive (cls <> "_prob") e d) testDf pes