{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}

module DataFrame.Synthesis where

import qualified DataFrame.Functions as F
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (
    DataFrame (..),
 )
import DataFrame.Internal.Expression (
    Expr (..),
    eSize,
 )
import DataFrame.Internal.Interpreter (interpret)
import DataFrame.Internal.Statistics
import DataFrame.Operations.Core (columnAsDoubleVector)
import qualified DataFrame.Operations.Statistics as Stats
import DataFrame.Operations.Subset (exclude)

import Control.Exception (throw)
import Data.Containers.ListUtils
import Data.Function
import qualified Data.List as L
import qualified Data.Map as M
import Data.Maybe (listToMaybe)
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Type.Equality
import qualified Data.Vector.Unboxed as VU
import qualified DataFrame.Operations.Core as D
import DataFrame.Operators
import Debug.Trace (trace)
import Type.Reflection (typeRep)

generateConditions ::
    TypedColumn Double -> [Expr Bool] -> [Expr Double] -> DataFrame -> [Expr Bool]
generateConditions :: TypedColumn Double
-> [Expr Bool] -> [Expr Double] -> DataFrame -> [Expr Bool]
generateConditions TypedColumn Double
labels [Expr Bool]
conds [Expr Double]
ps DataFrame
df =
    let
        newConds :: [Expr Bool]
newConds =
            [ Expr Double
p Expr Double -> Expr Double -> Expr Bool
forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
.<= Expr Double
q
            | Expr Double
p <- (Expr Double -> Bool) -> [Expr Double] -> [Expr Double]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Expr Double -> Bool) -> Expr Double -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral) [Expr Double]
ps
            , Expr Double
q <- [Expr Double]
ps
            , Expr Double
p Expr Double -> Expr Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Double
q
            ]
                [Expr Bool] -> [Expr Bool] -> [Expr Bool]
forall a. [a] -> [a] -> [a]
++ [ Expr Bool -> Expr Bool
F.not Expr Bool
p
                   | Expr Bool
p <- [Expr Bool]
conds
                   ]
        expandedConds :: [Expr Bool]
expandedConds =
            [Expr Bool]
conds
                [Expr Bool] -> [Expr Bool] -> [Expr Bool]
forall a. [a] -> [a] -> [a]
++ [Expr Bool]
newConds
                [Expr Bool] -> [Expr Bool] -> [Expr Bool]
forall a. [a] -> [a] -> [a]
++ [Expr Bool
p Expr Bool -> Expr Bool -> Expr Bool
.&& Expr Bool
q | Expr Bool
p <- [Expr Bool]
newConds, Expr Bool
q <- [Expr Bool]
conds, Expr Bool
p Expr Bool -> Expr Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Bool
q]
                [Expr Bool] -> [Expr Bool] -> [Expr Bool]
forall a. [a] -> [a] -> [a]
++ [Expr Bool
p Expr Bool -> Expr Bool -> Expr Bool
.|| Expr Bool
q | Expr Bool
p <- [Expr Bool]
newConds, Expr Bool
q <- [Expr Bool]
conds, Expr Bool
p Expr Bool -> Expr Bool -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Bool
q]
     in
        DataFrame
-> TypedColumn Double
-> [(Expr Bool, TypedColumn Bool)]
-> [Expr Bool]
pickTopNBool DataFrame
df TypedColumn Double
labels (DataFrame -> [Expr Bool] -> [(Expr Bool, TypedColumn Bool)]
forall a.
Columnable a =>
DataFrame -> [Expr a] -> [(Expr a, TypedColumn a)]
deduplicate DataFrame
df [Expr Bool]
expandedConds)

generatePrograms ::
    Bool ->
    [Expr Bool] ->
    [Expr Double] ->
    [Expr Double] ->
    [Expr Double] ->
    [Expr Double]
generatePrograms :: Bool
-> [Expr Bool]
-> [Expr Double]
-> [Expr Double]
-> [Expr Double]
-> [Expr Double]
generatePrograms Bool
_ [Expr Bool]
_ [Expr Double]
vars' [Expr Double]
constants [] = [Expr Double]
vars' [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
constants
generatePrograms Bool
includeConds [Expr Bool]
conds [Expr Double]
vars [Expr Double]
constants [Expr Double]
ps =
    let
        existingPrograms :: [Expr Double]
existingPrograms = [Expr Double]
ps [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
vars [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
constants
     in
        [Expr Double]
existingPrograms
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double
transform Expr Double
p
               | Expr Double
p <- [Expr Double]
ps [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
vars
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p)
               , Expr Double -> Expr Double
transform <-
                    [ Expr Double -> Expr Double
forall a. Floating a => a -> a
sqrt
                    , Expr Double -> Expr Double
forall a. Num a => a -> a
abs
                    , Expr Double -> Expr Double
forall a. Floating a => a -> a
log (Expr Double -> Expr Double)
-> (Expr Double -> Expr Double) -> Expr Double -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
1)
                    , Expr Double -> Expr Double
forall a. Floating a => a -> a
exp
                    , Expr Double -> Expr Double
forall a. Floating a => a -> a
sin
                    , Expr Double -> Expr Double
forall a. Floating a => a -> a
cos
                    , Expr Double -> Expr Double
forall a. (Columnable a, Num a, Ord a) => Expr a -> Expr a
F.relu
                    , Expr Double -> Expr Double
forall a. Num a => a -> a
signum
                    ]
               ]
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Int -> Expr Double
forall a. (Columnable a, Num a) => Expr a -> Int -> Expr a
F.pow Expr Double
p Int
i
               | Expr Double
p <- [Expr Double]
existingPrograms
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p)
               , Int
i <- [Int
2 .. Int
6]
               ]
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Expr Double
q
               | (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
               , Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
j
               ]
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
- Expr Double
q
               | (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
               , Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
               ]
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ ( if Bool
includeConds
                    then
                        [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
F.min Expr Double
p Expr Double
q
                        | (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
                        , (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
                        , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
                        , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
                        , Expr Double
p Expr Double -> Expr Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Double
q
                        , Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
j
                        ]
                            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
F.max Expr Double
p Expr Double
q
                               | (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
                               , (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
                               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
                               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
                               , Expr Double
p Expr Double -> Expr Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Double
q
                               , Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
j
                               ]
                            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Bool -> Expr Double -> Expr Double -> Expr Double
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
F.ifThenElse Expr Bool
cond Expr Double
r Expr Double
s
                               | Expr Bool
cond <- [Expr Bool]
conds
                               , Expr Double
r <- [Expr Double]
existingPrograms
                               , Expr Double
s <- [Expr Double]
existingPrograms
                               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
r Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
s)
                               , Expr Double
r Expr Double -> Expr Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Double
s
                               ]
                    else []
               )
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
* Expr Double
q
               | (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
               , Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
>= Integer
j
               ]
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Fractional a => a -> a -> a
/ Expr Double
q
               | Expr Double
p <- [Expr Double]
existingPrograms
               , Expr Double
q <- [Expr Double]
existingPrograms
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
p Bool -> Bool -> Bool
&& Expr Double -> Bool
forall a. Expr a -> Bool
isLiteral Expr Double
q)
               , Bool -> Bool
Prelude.not (Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
p Bool -> Bool -> Bool
|| Expr Double -> Bool
forall a. Expr a -> Bool
isConditional Expr Double
q)
               , Expr Double
p Expr Double -> Expr Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Expr Double
q
               ]

isLiteral :: Expr a -> Bool
isLiteral :: forall a. Expr a -> Bool
isLiteral (Lit a
_) = Bool
True
isLiteral Expr a
_ = Bool
False

isConditional :: Expr a -> Bool
isConditional :: forall a. Expr a -> Bool
isConditional (If{}) = Bool
True
isConditional Expr a
_ = Bool
False

deduplicate ::
    forall a.
    (Columnable a) =>
    DataFrame ->
    [Expr a] ->
    [(Expr a, TypedColumn a)]
deduplicate :: forall a.
Columnable a =>
DataFrame -> [Expr a] -> [(Expr a, TypedColumn a)]
deduplicate DataFrame
df = Set (TypedColumn a) -> [Expr a] -> [(Expr a, TypedColumn a)]
go Set (TypedColumn a)
forall a. Set a
S.empty ([Expr a] -> [(Expr a, TypedColumn a)])
-> ([Expr a] -> [Expr a]) -> [Expr a] -> [(Expr a, TypedColumn a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Expr a] -> [Expr a]
forall a. Ord a => [a] -> [a]
nubOrd ([Expr a] -> [Expr a])
-> ([Expr a] -> [Expr a]) -> [Expr a] -> [Expr a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Expr a -> Expr a -> Ordering) -> [Expr a] -> [Expr a]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (\Expr a
e1 Expr a
e2 -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Expr a -> Int
forall a. Expr a -> Int
eSize Expr a
e1) (Expr a -> Int
forall a. Expr a -> Int
eSize Expr a
e2))
  where
    go :: Set (TypedColumn a) -> [Expr a] -> [(Expr a, TypedColumn a)]
go Set (TypedColumn a)
_ [] = []
    go Set (TypedColumn a)
seen (Expr a
x : [Expr a]
xs)
        | Bool
hasInvalid = Set (TypedColumn a) -> [Expr a] -> [(Expr a, TypedColumn a)]
go Set (TypedColumn a)
seen [Expr a]
xs
        | TypedColumn a -> Set (TypedColumn a) -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member TypedColumn a
res Set (TypedColumn a)
seen = Set (TypedColumn a) -> [Expr a] -> [(Expr a, TypedColumn a)]
go Set (TypedColumn a)
seen [Expr a]
xs
        | Bool
otherwise = (Expr a
x, TypedColumn a
res) (Expr a, TypedColumn a)
-> [(Expr a, TypedColumn a)] -> [(Expr a, TypedColumn a)]
forall a. a -> [a] -> [a]
: Set (TypedColumn a) -> [Expr a] -> [(Expr a, TypedColumn a)]
go (TypedColumn a -> Set (TypedColumn a) -> Set (TypedColumn a)
forall a. Ord a => a -> Set a -> Set a
S.insert TypedColumn a
res Set (TypedColumn a)
seen) [Expr a]
xs
      where
        res :: TypedColumn a
res = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df Expr a
x of
            Left DataFrameException
e -> DataFrameException -> TypedColumn a
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn a
v -> TypedColumn a
v
        hasInvalid :: Bool
hasInvalid = case TypedColumn a
res of
            (TColumn (UnboxedColumn (Vector a
col :: VU.Vector b))) -> case TypeRep Double -> TypeRep a -> Maybe (Double :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
                Just Double :~: a
Refl -> (a -> Bool) -> Vector a -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.any (\a
n -> a -> Bool
forall a. RealFloat a => a -> Bool
isNaN a
n Bool -> Bool -> Bool
|| a -> Bool
forall a. RealFloat a => a -> Bool
isInfinite a
n) Vector a
col
                Maybe (Double :~: a)
Nothing -> Bool
False
            TypedColumn a
_ -> Bool
False

-- | Checks if two programs generate the same outputs given all the same inputs.
equivalent :: DataFrame -> Expr Double -> Expr Double -> Bool
equivalent :: DataFrame -> Expr Double -> Expr Double -> Bool
equivalent DataFrame
df Expr Double
p1 Expr Double
p2 = case TypedColumn Double -> TypedColumn Double -> Bool
forall a. Eq a => a -> a -> Bool
(==) (TypedColumn Double -> TypedColumn Double -> Bool)
-> Either DataFrameException (TypedColumn Double)
-> Either DataFrameException (TypedColumn Double -> Bool)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
p1 Either DataFrameException (TypedColumn Double -> Bool)
-> Either DataFrameException (TypedColumn Double)
-> Either DataFrameException Bool
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
p2 of
    Left DataFrameException
e -> DataFrameException -> Bool
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right Bool
v -> Bool
v

synthesizeFeatureExpr ::
    -- | Target expression
    T.Text ->
    BeamConfig ->
    DataFrame ->
    Either String (Expr Double)
synthesizeFeatureExpr :: Text -> BeamConfig -> DataFrame -> Either String (Expr Double)
synthesizeFeatureExpr Text
target BeamConfig
cfg DataFrame
df =
    let
        df' :: DataFrame
df' = [Text] -> DataFrame -> DataFrame
exclude [Text
target] DataFrame
df
        t :: TypedColumn Double
t = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df (Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
target) of
            Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn Double
v -> TypedColumn Double
v
     in
        case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> [Expr Bool]
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
            DataFrame
df'
            BeamConfig
cfg
            TypedColumn Double
t
            (DataFrame -> [Expr Double]
percentiles DataFrame
df')
            []
            [] of
            Maybe (Expr Double)
Nothing -> String -> Either String (Expr Double)
forall a b. a -> Either a b
Left String
"No programs found"
            Just Expr Double
p -> Expr Double -> Either String (Expr Double)
forall a b. b -> Either a b
Right Expr Double
p

f1FromBinary :: VU.Vector Double -> VU.Vector Double -> Maybe Double
f1FromBinary :: Vector Double -> Vector Double -> Maybe Double
f1FromBinary Vector Double
trues Vector Double
preds =
    let (!Int
tp, !Int
fp, !Int
fn) =
            ((Int, Int, Int) -> (Bool, Bool) -> (Int, Int, Int))
-> (Int, Int, Int) -> Vector (Bool, Bool) -> (Int, Int, Int)
forall b a. Unbox b => (a -> b -> a) -> a -> Vector b -> a
VU.foldl' (Int, Int, Int) -> (Bool, Bool) -> (Int, Int, Int)
forall {a} {b} {c}.
(Num a, Num b, Num c) =>
(a, b, c) -> (Bool, Bool) -> (a, b, c)
step (Int
0 :: Int, Int
0 :: Int, Int
0 :: Int) (Vector (Bool, Bool) -> (Int, Int, Int))
-> Vector (Bool, Bool) -> (Int, Int, Int)
forall a b. (a -> b) -> a -> b
$
                Vector Bool -> Vector Bool -> Vector (Bool, Bool)
forall a b.
(Unbox a, Unbox b) =>
Vector a -> Vector b -> Vector (a, b)
VU.zip ((Double -> Bool) -> Vector Double -> Vector Bool
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
0) Vector Double
preds) ((Double -> Bool) -> Vector Double -> Vector Bool
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
0) Vector Double
trues)
     in Int -> Int -> Int -> Maybe Double
f1FromCounts Int
tp Int
fp Int
fn
  where
    step :: (a, b, c) -> (Bool, Bool) -> (a, b, c)
step (!a
tp, !b
fp, !c
fn) (!Bool
p, !Bool
t) =
        case (Bool
p, Bool
t) of
            (Bool
True, Bool
True) -> (a
tp a -> a -> a
forall a. Num a => a -> a -> a
+ a
1, b
fp, c
fn)
            (Bool
True, Bool
False) -> (a
tp, b
fp b -> b -> b
forall a. Num a => a -> a -> a
+ b
1, c
fn)
            (Bool
False, Bool
True) -> (a
tp, b
fp, c
fn c -> c -> c
forall a. Num a => a -> a -> a
+ c
1)
            (Bool
False, Bool
False) -> (a
tp, b
fp, c
fn)

f1FromCounts :: Int -> Int -> Int -> Maybe Double
f1FromCounts :: Int -> Int -> Int -> Maybe Double
f1FromCounts Int
tp Int
fp Int
fn =
    let tp' :: Double
tp' = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
tp
        fp' :: Double
fp' = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
fp
        fn' :: Double
fn' = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
fn
        precision :: Double
precision = if Double
tp' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
fp' Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0 then Double
0 else Double
tp' Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
tp' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
fp')
        recall :: Double
recall = if Double
tp' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
fn' Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0 then Double
0 else Double
tp' Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
tp' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
fn')
     in if Double
precision Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
recall Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0
            then Maybe Double
forall a. Maybe a
Nothing
            else Double -> Maybe Double
forall a. a -> Maybe a
Just (Double
2 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
precision Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
recall Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
precision Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
recall))

fitClassifier ::
    -- | Target expression
    T.Text ->
    -- | Depth of search (Roughly, how many terms in the final expression)
    Int ->
    -- | Beam size - the number of candidate expressions to consider at a time.
    Int ->
    DataFrame ->
    Either String (Expr Int)
fitClassifier :: Text -> Int -> Int -> DataFrame -> Either String (Expr Int)
fitClassifier Text
target Int
d Int
b DataFrame
df =
    let
        df' :: DataFrame
df' = [Text] -> DataFrame -> DataFrame
exclude [Text
target] DataFrame
df
        t :: TypedColumn Double
t = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df (Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
target) of
            Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn Double
v -> TypedColumn Double
v
     in
        case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> [Expr Bool]
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
            DataFrame
df'
            (Int -> Int -> LossFunction -> Bool -> BeamConfig
BeamConfig Int
d Int
b LossFunction
F1 Bool
True)
            TypedColumn Double
t
            (DataFrame -> [Expr Double]
percentiles DataFrame
df' [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
1, Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
0, Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (-Double
1)])
            []
            [] of
            Maybe (Expr Double)
Nothing -> String -> Either String (Expr Int)
forall a b. a -> Either a b
Left String
"No programs found"
            Just Expr Double
p -> Expr Int -> Either String (Expr Int)
forall a b. b -> Either a b
Right (Expr Bool -> Expr Int -> Expr Int -> Expr Int
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
F.ifThenElse (Expr Double
p Expr Double -> Expr Double -> Expr Bool
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
.> Expr Double
0) Expr Int
1 Expr Int
0)

percentiles :: DataFrame -> [Expr Double]
percentiles :: DataFrame -> [Expr Double]
percentiles DataFrame
df =
    let
        doubleColumns :: [Vector Double]
doubleColumns =
            (Text -> Vector Double) -> [Text] -> [Vector Double]
forall a b. (a -> b) -> [a] -> [b]
map
                ((DataFrameException -> Vector Double)
-> (Vector Double -> Vector Double)
-> Either DataFrameException (Vector Double)
-> Vector Double
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw Vector Double -> Vector Double
forall a. a -> a
id (Either DataFrameException (Vector Double) -> Vector Double)
-> (Text -> Either DataFrameException (Vector Double))
-> Text
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Expr Double
-> DataFrame -> Either DataFrameException (Vector Double)
forall a.
(Columnable a, Num a) =>
Expr a -> DataFrame -> Either DataFrameException (Vector Double)
`columnAsDoubleVector` DataFrame
df) (Expr Double -> Either DataFrameException (Vector Double))
-> (Text -> Expr Double)
-> Text
-> Either DataFrameException (Vector Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Columnable a => Text -> Expr a
Col @Double))
                (DataFrame -> [Text]
D.columnNames DataFrame
df)
     in
        (Vector Double -> [Expr Double])
-> [Vector Double] -> [Expr Double]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap
            (\Vector Double
c -> (Int -> Expr Double) -> [Int] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Double -> Expr Double) -> (Int -> Double) -> Int -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double
roundTo2SigDigits (Double -> Double) -> (Int -> Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Vector Double -> Double
forall a. (Unbox a, Num a, Real a) => Int -> Vector a -> Double
`percentile'` Vector Double
c)) [Int
1, Int
25, Int
75, Int
99])
            [Vector Double]
doubleColumns
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ (Vector Double -> Expr Double) -> [Vector Double] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Double -> Expr Double)
-> (Vector Double -> Double) -> Vector Double -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double
roundTo2SigDigits (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance') [Vector Double]
doubleColumns
            [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ (Vector Double -> Expr Double) -> [Vector Double] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map (Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit (Double -> Expr Double)
-> (Vector Double -> Double) -> Vector Double -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double
roundTo2SigDigits (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Vector Double -> Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance') [Vector Double]
doubleColumns

roundToSigDigits :: Int -> Double -> Double
roundToSigDigits :: Int -> Double -> Double
roundToSigDigits Int
n Double
x
    | Double
x Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0 = Double
0
    | Bool
otherwise =
        let magnitude :: Int
magnitude = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Double -> Double
forall a. Floating a => a -> a -> a
logBase Double
10 (Double -> Double
forall a. Num a => a -> a
abs Double
x))
            scale :: Double
scale = Double
10 Double -> Double -> Double
forall a. Floating a => a -> a -> a
** Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
magnitude)
         in Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Double -> Integer
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
scale)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
scale

roundTo2SigDigits :: Double -> Double
roundTo2SigDigits :: Double -> Double
roundTo2SigDigits = Int -> Double -> Double
roundToSigDigits Int
2

fitRegression ::
    -- | Target expression
    T.Text ->
    -- | Depth of search (Roughly, how many terms in the final expression)
    Int ->
    -- | Beam size - the number of candidate expressions to consider at a time.
    Int ->
    DataFrame ->
    Either String (Expr Double)
fitRegression :: Text -> Int -> Int -> DataFrame -> Either String (Expr Double)
fitRegression Text
target Int
d Int
b DataFrame
df =
    let
        df' :: DataFrame
df' = [Text] -> DataFrame -> DataFrame
exclude [Text
target] DataFrame
df
        targetMean :: Double
targetMean = Expr Double -> DataFrame -> Double
forall a.
(Columnable a, Real a, Unbox a) =>
Expr a -> DataFrame -> Double
Stats.mean (forall a. Columnable a => Text -> Expr a
Col @Double Text
target) DataFrame
df
        t :: TypedColumn Double
t = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df (Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
target) of
            Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn Double
v -> TypedColumn Double
v
        cfg :: BeamConfig
cfg = Int -> Int -> LossFunction -> Bool -> BeamConfig
BeamConfig Int
d Int
b LossFunction
MeanSquaredError Bool
True
        constants :: [Expr Double]
constants =
            DataFrame -> [Expr Double]
percentiles DataFrame
df'
                [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
targetMean]
                [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Int -> Expr Double
forall a. (Columnable a, Num a) => Expr a -> Int -> Expr a
F.pow Expr Double
p Int
i
                   | Int
i <- [Int
1 .. Int
6]
                   , Expr Double
p <- [Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
10, Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
1, Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
0.1]
                   ]
     in
        case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> [Expr Bool]
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch DataFrame
df' BeamConfig
cfg TypedColumn Double
t [Expr Double]
constants [] [] of
            Maybe (Expr Double)
Nothing -> String -> Either String (Expr Double)
forall a b. a -> Either a b
Left String
"No programs found"
            Just Expr Double
p -> Expr Double -> Either String (Expr Double)
forall a b. b -> Either a b
Right Expr Double
p

data LossFunction
    = PearsonCorrelation
    | MutualInformation
    | MeanSquaredError
    | F1

getLossFunction ::
    LossFunction -> (VU.Vector Double -> VU.Vector Double -> Maybe Double)
getLossFunction :: LossFunction -> Vector Double -> Vector Double -> Maybe Double
getLossFunction LossFunction
f = case LossFunction
f of
    LossFunction
MutualInformation ->
        ( \Vector Double
l Vector Double
r ->
            Int -> Vector Double -> Vector Double -> Maybe Double
mutualInformationBinned
                (Int -> Int -> Int
forall a. Ord a => a -> a -> a
Prelude.max Int
10 (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Double
forall a. Floating a => a -> a
sqrt (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
l)))))
                Vector Double
l
                Vector Double
r
        )
    LossFunction
PearsonCorrelation -> (\Vector Double
l Vector Double
r -> (Double -> Integer -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ Integer
2) (Double -> Double) -> Maybe Double -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
l Vector Double
r)
    LossFunction
MeanSquaredError -> (\Vector Double
l Vector Double
r -> (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Double -> Double
forall a. Num a => a -> a
negate (Vector Double -> Vector Double -> Maybe Double
meanSquaredError Vector Double
l Vector Double
r))
    LossFunction
F1 -> Vector Double -> Vector Double -> Maybe Double
f1FromBinary

data BeamConfig = BeamConfig
    { BeamConfig -> Int
searchDepth :: Int
    , BeamConfig -> Int
beamLength :: Int
    , BeamConfig -> LossFunction
lossFunction :: LossFunction
    , BeamConfig -> Bool
includeConditionals :: Bool
    }

defaultBeamConfig :: BeamConfig
defaultBeamConfig :: BeamConfig
defaultBeamConfig = Int -> Int -> LossFunction -> Bool -> BeamConfig
BeamConfig Int
2 Int
100 LossFunction
PearsonCorrelation Bool
False

beamSearch ::
    DataFrame ->
    -- | Parameters of the beam search.
    BeamConfig ->
    -- | Examples
    TypedColumn Double ->
    -- | Constants
    [Expr Double] ->
    -- | Conditions
    [Expr Bool] ->
    -- | Programs
    [Expr Double] ->
    Maybe (Expr Double)
beamSearch :: DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> [Expr Bool]
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch DataFrame
df BeamConfig
cfg TypedColumn Double
outputs [Expr Double]
constants [Expr Bool]
conds [Expr Double]
programs
    | BeamConfig -> Int
searchDepth BeamConfig
cfg Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = case [Expr Double]
ps of
        [] -> Maybe (Expr Double)
forall a. Maybe a
Nothing
        (Expr Double
x : [Expr Double]
_) -> Expr Double -> Maybe (Expr Double)
forall a. a -> Maybe a
Just Expr Double
x
    | Bool
otherwise =
        DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> [Expr Bool]
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
            DataFrame
df
            (BeamConfig
cfg{searchDepth = searchDepth cfg - 1})
            TypedColumn Double
outputs
            [Expr Double]
constants
            [Expr Bool]
conditions
            (Bool
-> [Expr Bool]
-> [Expr Double]
-> [Expr Double]
-> [Expr Double]
-> [Expr Double]
generatePrograms (BeamConfig -> Bool
includeConditionals BeamConfig
cfg) [Expr Bool]
conditions [Expr Double]
vars [Expr Double]
constants [Expr Double]
ps)
  where
    vars :: [Expr Double]
vars = (Text -> Expr Double) -> [Text] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col [Text]
names
    conditions :: [Expr Bool]
conditions = TypedColumn Double
-> [Expr Bool] -> [Expr Double] -> DataFrame -> [Expr Bool]
generateConditions TypedColumn Double
outputs [Expr Bool]
conds ([Expr Double]
vars [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
constants) DataFrame
df
    ps :: [Expr Double]
ps = DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn Double)]
-> [Expr Double]
forall a.
DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn a)]
-> [Expr Double]
pickTopN DataFrame
df TypedColumn Double
outputs BeamConfig
cfg ([(Expr Double, TypedColumn Double)] -> [Expr Double])
-> [(Expr Double, TypedColumn Double)] -> [Expr Double]
forall a b. (a -> b) -> a -> b
$ DataFrame -> [Expr Double] -> [(Expr Double, TypedColumn Double)]
forall a.
Columnable a =>
DataFrame -> [Expr a] -> [(Expr a, TypedColumn a)]
deduplicate DataFrame
df [Expr Double]
programs
    names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df

pickTopN ::
    DataFrame ->
    TypedColumn Double ->
    BeamConfig ->
    [(Expr Double, TypedColumn a)] ->
    [Expr Double]
pickTopN :: forall a.
DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn a)]
-> [Expr Double]
pickTopN DataFrame
_ TypedColumn Double
_ BeamConfig
_ [] = []
pickTopN DataFrame
df (TColumn Column
col) BeamConfig
cfg [(Expr Double, TypedColumn a)]
ps =
    let
        l :: Vector Double
l = case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col of
            Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right Vector Double
v -> Vector Double
v
        ordered :: [Expr Double]
ordered =
            Int -> [Expr Double] -> [Expr Double]
forall a. Int -> [a] -> [a]
Prelude.take
                (BeamConfig -> Int
beamLength BeamConfig
cfg)
                ( ((Expr Double, Maybe Double) -> Expr Double)
-> [(Expr Double, Maybe Double)] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map (Expr Double, Maybe Double) -> Expr Double
forall a b. (a, b) -> a
fst ([(Expr Double, Maybe Double)] -> [Expr Double])
-> [(Expr Double, Maybe Double)] -> [Expr Double]
forall a b. (a -> b) -> a -> b
$
                    ((Expr Double, Maybe Double)
 -> (Expr Double, Maybe Double) -> Ordering)
-> [(Expr Double, Maybe Double)] -> [(Expr Double, Maybe Double)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy
                        ( \(Expr Double
_, Maybe Double
c2) (Expr Double
_, Maybe Double
c1) ->
                            if Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c1
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c2
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c1
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c2
                                then Ordering
LT
                                else Maybe Double -> Maybe Double -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Maybe Double
c1 Maybe Double
c2
                        )
                        ( ((Expr Double, TypedColumn a) -> (Expr Double, Maybe Double))
-> [(Expr Double, TypedColumn a)] -> [(Expr Double, Maybe Double)]
forall a b. (a -> b) -> [a] -> [b]
map
                            (\(Expr Double
e, TypedColumn a
res) -> (Expr Double
e, LossFunction -> Vector Double -> Vector Double -> Maybe Double
getLossFunction (BeamConfig -> LossFunction
lossFunction BeamConfig
cfg) Vector Double
l (TypedColumn a -> Vector Double
forall {w :: * -> *} {a}.
Vector w Double =>
TypedColumn a -> w Double
asDoubleVector TypedColumn a
res)))
                            [(Expr Double, TypedColumn a)]
ps
                        )
                )
        asDoubleVector :: TypedColumn a -> w Double
asDoubleVector TypedColumn a
c =
            let
                (TColumn Column
col') = TypedColumn a
c
             in
                case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col' of
                    Left DataFrameException
e -> DataFrameException -> w Double
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right Vector Double
v -> Vector Double -> w Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
v
        interpretDoubleVector :: Expr Double -> Vector Double
interpretDoubleVector Expr Double
e =
            let
                (TColumn Column
col') = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
e of
                    Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right TypedColumn Double
v -> TypedColumn Double
v
             in
                case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col' of
                    Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right Vector Double
v -> Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
v
     in
        String -> [Expr Double] -> [Expr Double]
forall a. String -> a -> a
trace
            ( String
"Best loss: "
                String -> String -> String
forall a. [a] -> [a] -> [a]
++ Maybe (Maybe Double) -> String
forall a. Show a => a -> String
show
                    ( LossFunction -> Vector Double -> Vector Double -> Maybe Double
getLossFunction (BeamConfig -> LossFunction
lossFunction BeamConfig
cfg) Vector Double
l (Vector Double -> Maybe Double)
-> (Expr Double -> Vector Double) -> Expr Double -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Expr Double -> Vector Double
interpretDoubleVector
                        (Expr Double -> Maybe Double)
-> Maybe (Expr Double) -> Maybe (Maybe Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Expr Double] -> Maybe (Expr Double)
forall a. [a] -> Maybe a
listToMaybe [Expr Double]
ordered
                    )
                String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" "
                String -> String -> String
forall a. [a] -> [a] -> [a]
++ (if [Expr Double] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Expr Double]
ordered then String
"empty" else Maybe (Expr Double) -> String
forall a. Show a => a -> String
show ([Expr Double] -> Maybe (Expr Double)
forall a. [a] -> Maybe a
listToMaybe [Expr Double]
ordered))
            )
            [Expr Double]
ordered

pickTopNBool ::
    DataFrame ->
    TypedColumn Double ->
    [(Expr Bool, TypedColumn Bool)] ->
    [Expr Bool]
pickTopNBool :: DataFrame
-> TypedColumn Double
-> [(Expr Bool, TypedColumn Bool)]
-> [Expr Bool]
pickTopNBool DataFrame
_ TypedColumn Double
_ [] = []
pickTopNBool DataFrame
df (TColumn Column
col) [(Expr Bool, TypedColumn Bool)]
ps =
    let
        l :: Vector Double
l = case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col of
            Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right Vector Double
v -> Vector Double
v
        ordered :: [Expr Bool]
ordered =
            Int -> [Expr Bool] -> [Expr Bool]
forall a. Int -> [a] -> [a]
Prelude.take
                Int
10
                ( ((Expr Bool, Maybe Double) -> Expr Bool)
-> [(Expr Bool, Maybe Double)] -> [Expr Bool]
forall a b. (a -> b) -> [a] -> [b]
map (Expr Bool, Maybe Double) -> Expr Bool
forall a b. (a, b) -> a
fst ([(Expr Bool, Maybe Double)] -> [Expr Bool])
-> [(Expr Bool, Maybe Double)] -> [Expr Bool]
forall a b. (a -> b) -> a -> b
$
                    ((Expr Bool, Maybe Double)
 -> (Expr Bool, Maybe Double) -> Ordering)
-> [(Expr Bool, Maybe Double)] -> [(Expr Bool, Maybe Double)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy
                        ( \(Expr Bool
_, Maybe Double
c2) (Expr Bool
_, Maybe Double
c1) ->
                            if Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c1
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c2
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c1
                                Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c2
                                then Ordering
LT
                                else Maybe Double -> Maybe Double -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Maybe Double
c1 Maybe Double
c2
                        )
                        ( ((Expr Bool, TypedColumn Bool) -> (Expr Bool, Maybe Double))
-> [(Expr Bool, TypedColumn Bool)] -> [(Expr Bool, Maybe Double)]
forall a b. (a -> b) -> [a] -> [b]
map
                            (\(Expr Bool
e, TypedColumn Bool
res) -> (Expr Bool
e, LossFunction -> Vector Double -> Vector Double -> Maybe Double
getLossFunction LossFunction
MutualInformation Vector Double
l (TypedColumn Bool -> Vector Double
forall {a}. TypedColumn a -> Vector Double
asDoubleVector TypedColumn Bool
res)))
                            [(Expr Bool, TypedColumn Bool)]
ps
                        )
                )
        asDoubleVector :: TypedColumn a -> Vector Double
asDoubleVector TypedColumn a
c =
            let
                (TColumn Column
col') = TypedColumn a
c
             in
                case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Bool @VU.Vector Column
col' of
                    Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
                    Right Vector Bool
v -> (Bool -> Double) -> Vector Bool -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Double (Int -> Double) -> (Bool -> Int) -> Bool -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Int
forall a. Enum a => a -> Int
fromEnum) Vector Bool
v
     in
        [Expr Bool]
ordered

satisfiesExamples :: DataFrame -> TypedColumn Double -> Expr Double -> Bool
satisfiesExamples :: DataFrame -> TypedColumn Double -> Expr Double -> Bool
satisfiesExamples DataFrame
df TypedColumn Double
col Expr Double
expr =
    let
        result :: TypedColumn Double
result = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
expr of
            Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
            Right TypedColumn Double
v -> TypedColumn Double
v
     in
        TypedColumn Double
result TypedColumn Double -> TypedColumn Double -> Bool
forall a. Eq a => a -> a -> Bool
== TypedColumn Double
col