{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
module DataFrame.Functions where
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..), unsafeGetColumn)
import DataFrame.Internal.Expression (
Expr (..),
UExpr (..),
eSize,
interpret,
replaceExpr,
)
import DataFrame.Internal.Statistics
import qualified DataFrame.Operations.Statistics as Stats
import DataFrame.Operations.Subset (exclude, select)
import Control.Exception (throw)
import Control.Monad
import qualified Data.Char as Char
import Data.Function
import qualified Data.List as L
import qualified Data.Map as M
import Data.Maybe (fromMaybe, listToMaybe)
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Type.Equality
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified DataFrame.Operations.Transformations as D
import Debug.Trace (trace, traceShow)
import Language.Haskell.TH
import qualified Language.Haskell.TH.Syntax as TH
import Type.Reflection (typeRep)
import Prelude hiding (maximum, minimum, sum)
name :: (Show a) => Expr a -> T.Text
name :: forall a. Show a => Expr a -> Text
name (Col Text
n) = Text
n
name Expr a
other =
[Char] -> Text
forall a. HasCallStack => [Char] -> a
error ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$
[Char]
"You must call `name` on a column reference. Not the expression: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Expr a -> [Char]
forall a. Show a => a -> [Char]
show Expr a
other
col :: (Columnable a) => T.Text -> Expr a
col :: forall a. Columnable a => Text -> Expr a
col = Text -> Expr a
forall a. Columnable a => Text -> Expr a
Col
as :: (Columnable a) => Expr a -> T.Text -> (T.Text, UExpr)
as :: forall a. Columnable a => Expr a -> Text -> (Text, UExpr)
as Expr a
expr Text
name = (Text
name, Expr a -> UExpr
forall a. Columnable a => Expr a -> UExpr
Wrap Expr a
expr)
ifThenElse :: (Columnable a) => Expr Bool -> Expr a -> Expr a -> Expr a
ifThenElse :: forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
ifThenElse = Expr Bool -> Expr a -> Expr a -> Expr a
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
If
lit :: (Columnable a) => a -> Expr a
lit :: forall a. Columnable a => a -> Expr a
lit = a -> Expr a
forall a. Columnable a => a -> Expr a
Lit
lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b
lift :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr a -> Expr b
lift = Text -> (a -> b) -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
UnaryOp Text
"udf"
lift2 ::
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 :: forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 = Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"udf"
(==) :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
== :: forall a. (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
(==) = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"eq" a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(Prelude.==)
eq :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq :: forall a. (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"eq" a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(Prelude.==)
(<) :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
< :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
(<) = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"lt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.<)
lt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"lt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.<)
(>) :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
> :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
(>) = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"gt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.>)
gt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"gt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.>)
(<=) :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
<= :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
(<=) = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"leq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.<=)
leq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
leq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
leq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"leq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.<=)
(>=) :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
>= :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
(>=) = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"geq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.>=)
geq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
geq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
geq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"geq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(Prelude.>=)
and :: Expr Bool -> Expr Bool -> Expr Bool
and :: Expr Bool -> Expr Bool -> Expr Bool
and = Text
-> (Bool -> Bool -> Bool) -> Expr Bool -> Expr Bool -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"and" Bool -> Bool -> Bool
(&&)
or :: Expr Bool -> Expr Bool -> Expr Bool
or :: Expr Bool -> Expr Bool -> Expr Bool
or = Text
-> (Bool -> Bool -> Bool) -> Expr Bool -> Expr Bool -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"or" Bool -> Bool -> Bool
(||)
not :: Expr Bool -> Expr Bool
not :: Expr Bool -> Expr Bool
not = Text -> (Bool -> Bool) -> Expr Bool -> Expr Bool
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
UnaryOp Text
"not" Bool -> Bool
Prelude.not
count :: (Columnable a) => Expr a -> Expr Int
count :: forall a. Columnable a => Expr a -> Expr Int
count Expr a
expr = Expr a -> Text -> Int -> (Int -> a -> Int) -> Expr Int
forall a b.
(Columnable a, Columnable b) =>
Expr b -> Text -> a -> (a -> b -> a) -> Expr a
AggFold Expr a
expr Text
"foldUdf" Int
0 (\Int
acc a
_ -> Int
acc Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
minimum :: (Columnable a, Ord a) => Expr a -> Expr a
minimum :: forall a. (Columnable a, Ord a) => Expr a -> Expr a
minimum Expr a
expr = Expr a
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Expr a
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
AggReduce Expr a
expr Text
"minimum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
Prelude.min
maximum :: (Columnable a, Ord a) => Expr a -> Expr a
maximum :: forall a. (Columnable a, Ord a) => Expr a -> Expr a
maximum Expr a
expr = Expr a
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Expr a
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
AggReduce Expr a
expr Text
"maximum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
Prelude.max
sum :: forall a. (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr a
sum :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr a
sum Expr a
expr = Expr a -> Text -> (Vector a -> a) -> Expr a
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Expr b -> Text -> (Vector b -> a) -> Expr a
AggNumericVector Expr a
expr Text
"sum" Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum
mean :: (Columnable a, Real a, VU.Unbox a) => Expr a -> Expr Double
mean :: forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
mean Expr a
expr = Expr a -> Text -> (Vector a -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Expr b -> Text -> (Vector b -> a) -> Expr a
AggNumericVector Expr a
expr Text
"mean" Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean'
median :: Expr Double -> Expr Double
median :: Expr Double -> Expr Double
median Expr Double
expr = Expr Double -> Text -> (Vector Double -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Expr b -> Text -> (Vector b -> a) -> Expr a
AggNumericVector Expr Double
expr Text
"mean" Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median'
percentile :: Int -> Expr Double -> Expr Double
percentile :: Int -> Expr Double -> Expr Double
percentile Int
n Expr Double
expr =
Expr Double -> Text -> (Vector Double -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Expr b -> Text -> (Vector b -> a) -> Expr a
AggNumericVector
Expr Double
expr
([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char]
"percentile " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n)
((Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
0) (Vector Double -> Double)
-> (Vector Double -> Vector Double) -> Vector Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Int -> Vector Double -> Vector Double
quantiles' ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
VU.fromList [Int
n]) Int
100)
stddev :: (Columnable a, Real a, VU.Unbox a) => Expr a -> Expr Double
stddev :: forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev Expr a
expr = Expr a -> Text -> (Vector a -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Expr b -> Text -> (Vector b -> a) -> Expr a
AggNumericVector Expr a
expr Text
"stddev" (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> (Vector a -> Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance')
zScore :: Expr Double -> Expr Double
zScore :: Expr Double -> Expr Double
zScore Expr Double
c = (Expr Double
c Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
- Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
mean Expr Double
c) Expr Double -> Expr Double -> Expr Double
forall a. Fractional a => a -> a -> a
/ Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev Expr Double
c
pow :: (Columnable a, Num a) => Int -> Expr a -> Expr a
pow :: forall a. (Columnable a, Num a) => Int -> Expr a -> Expr a
pow Int
i = Text -> (a -> a) -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
UnaryOp (Text
"pow " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i)) (a -> Int -> a
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
i)
relu :: (Columnable a, Num a) => Expr a -> Expr a
relu :: forall a. (Columnable a, Num a) => Expr a -> Expr a
relu = Text -> (a -> a) -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
UnaryOp Text
"relu" (a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.max a
0)
min :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
min :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
min = Text -> (a -> a -> a) -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"min" a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.min
max :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
max :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
max = Text -> (a -> a -> a) -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinaryOp Text
"max" a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.max
reduce ::
forall a b.
(Columnable a, Columnable b) => Expr b -> a -> (a -> b -> a) -> Expr a
reduce :: forall a b.
(Columnable a, Columnable b) =>
Expr b -> a -> (a -> b -> a) -> Expr a
reduce Expr b
expr = Expr b -> Text -> a -> (a -> b -> a) -> Expr a
forall a b.
(Columnable a, Columnable b) =>
Expr b -> Text -> a -> (a -> b -> a) -> Expr a
AggFold Expr b
expr Text
"foldUdf"
generatePrograms :: [Expr Double] -> [Expr Double] -> [Expr Double]
generatePrograms :: [Expr Double] -> [Expr Double] -> [Expr Double]
generatePrograms [Expr Double]
vars [] =
[Expr Double]
vars
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double
transform Expr Double
p
| Expr Double
p <- [Expr Double]
vars
, Expr Double -> Expr Double
transform <-
[ Expr Double -> Expr Double
zScore
, Expr Double -> Expr Double
forall a. Num a => a -> a
abs
, Expr Double -> Expr Double
forall a. Floating a => a -> a
sqrt
, Expr Double -> Expr Double
forall a. Floating a => a -> a
log (Expr Double -> Expr Double)
-> (Expr Double -> Expr Double) -> Expr Double -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
1)
, Expr Double -> Expr Double
forall a. Floating a => a -> a
exp
, Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
mean
, Expr Double -> Expr Double
median
, Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev
, Expr Double -> Expr Double
forall a. Floating a => a -> a
sin
, Expr Double -> Expr Double
forall a. Floating a => a -> a
cos
, Expr Double -> Expr Double
forall a. (Columnable a, Num a) => Expr a -> Expr a
relu
, Expr Double -> Expr Double
forall a. Num a => a -> a
signum
]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Int -> Expr Double -> Expr Double
forall a. (Columnable a, Num a) => Int -> Expr a -> Expr a
pow Int
i Expr Double
p
| Expr Double
p <- [Expr Double]
vars
, Int
i <- [Int
2 .. Int
6]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.> Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
DataFrame.Functions.min Expr Double
p Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.> Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
DataFrame.Functions.max Expr Double
p Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.>= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
- Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
* Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.>= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Fractional a => a -> a -> a
/ Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
vars
, Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
]
generatePrograms [Expr Double]
vars [Expr Double]
ps =
let
existingPrograms :: [Expr Double]
existingPrograms = [Expr Double]
vars [Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [Expr Double]
ps
in
[Expr Double]
existingPrograms
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double
transform Expr Double
p
| Expr Double
p <- [Expr Double]
existingPrograms
, Expr Double -> Expr Double
transform <-
[ Expr Double -> Expr Double
zScore
, Expr Double -> Expr Double
forall a. Floating a => a -> a
sqrt
, Expr Double -> Expr Double
forall a. Num a => a -> a
abs
, Expr Double -> Expr Double
forall a. Floating a => a -> a
log (Expr Double -> Expr Double)
-> (Expr Double -> Expr Double) -> Expr Double -> Expr Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Double -> Expr Double
forall a. Columnable a => a -> Expr a
Lit Double
1)
, Expr Double -> Expr Double
forall a. Floating a => a -> a
exp
, Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
mean
, Expr Double -> Expr Double
median
, Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev
, Expr Double -> Expr Double
forall a. Floating a => a -> a
sin
, Expr Double -> Expr Double
forall a. Floating a => a -> a
cos
, Expr Double -> Expr Double
forall a. (Columnable a, Num a) => Expr a -> Expr a
relu
, Expr Double -> Expr Double
forall a. Num a => a -> a
signum
]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Int -> Expr Double -> Expr Double
forall a. (Columnable a, Num a) => Int -> Expr a -> Expr a
pow Int
i Expr Double
p
| Expr Double
p <- [Expr Double]
existingPrograms
, Int
i <- [Int
2 .. Int
6]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
+ Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.>= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
DataFrame.Functions.min Expr Double
p Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.> Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double -> Expr Double -> Expr Double
forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
DataFrame.Functions.max Expr Double
p Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.> Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Bool -> Expr Double -> Expr Double -> Expr Double
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
ifThenElse (Expr Double
p Expr Double -> Expr Double -> Expr Bool
forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
DataFrame.Functions.>= Int -> Expr Double -> Expr Double
percentile Int
n Expr Double
p) Expr Double
p Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
, Int
n <- [Int
1, Int
25, Int
50, Int
75, Int
99]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Bool -> Expr Double -> Expr Double -> Expr Double
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
ifThenElse (Expr Double
p Expr Double -> Expr Double -> Expr Bool
forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
DataFrame.Functions.>= Int -> Expr Double -> Expr Double
percentile Int
n Expr Double
p) Expr Double
p Expr Double
q
| Expr Double
p <- [Expr Double]
existingPrograms
, Expr Double
q <- [Double -> Expr Double
forall a. Columnable a => a -> Expr a
lit Double
1, Double -> Expr Double
forall a. Columnable a => a -> Expr a
lit Double
0, Double -> Expr Double
forall a. Columnable a => a -> Expr a
lit (-Double
1)]
, Int
n <- [Int
1, Int
25, Int
50, Int
75, Int
99]
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
- Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
* Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
Prelude.>= Integer
j
]
[Expr Double] -> [Expr Double] -> [Expr Double]
forall a. [a] -> [a] -> [a]
++ [ Expr Double
p Expr Double -> Expr Double -> Expr Double
forall a. Fractional a => a -> a -> a
/ Expr Double
q
| (Integer
i, Expr Double
p) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, (Integer
j, Expr Double
q) <- [Integer] -> [Expr Double] -> [(Integer, Expr Double)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Integer
0 ..] [Expr Double]
existingPrograms
, Integer
i Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
/= Integer
j
]
deduplicate ::
DataFrame ->
[Expr Double] ->
[(Expr Double, TypedColumn Double)]
deduplicate :: DataFrame -> [Expr Double] -> [(Expr Double, TypedColumn Double)]
deduplicate DataFrame
df = Set (TypedColumn Double)
-> [Expr Double] -> [(Expr Double, TypedColumn Double)]
go Set (TypedColumn Double)
forall a. Set a
S.empty ([Expr Double] -> [(Expr Double, TypedColumn Double)])
-> ([Expr Double] -> [Expr Double])
-> [Expr Double]
-> [(Expr Double, TypedColumn Double)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Expr Double -> Expr Double -> Ordering)
-> [Expr Double] -> [Expr Double]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (\Expr Double
e1 Expr Double
e2 -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Expr Double -> Int
forall a. Expr a -> Int
eSize Expr Double
e1) (Expr Double -> Int
forall a. Expr a -> Int
eSize Expr Double
e2))
where
go :: Set (TypedColumn Double)
-> [Expr Double] -> [(Expr Double, TypedColumn Double)]
go Set (TypedColumn Double)
_ [] = []
go Set (TypedColumn Double)
seen (Expr Double
x : [Expr Double]
xs)
| Bool
hasInvalid = Set (TypedColumn Double)
-> [Expr Double] -> [(Expr Double, TypedColumn Double)]
go Set (TypedColumn Double)
seen [Expr Double]
xs
| TypedColumn Double -> Set (TypedColumn Double) -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member TypedColumn Double
res Set (TypedColumn Double)
seen = Set (TypedColumn Double)
-> [Expr Double] -> [(Expr Double, TypedColumn Double)]
go Set (TypedColumn Double)
seen [Expr Double]
xs
| Bool
otherwise = (Expr Double
x, TypedColumn Double
res) (Expr Double, TypedColumn Double)
-> [(Expr Double, TypedColumn Double)]
-> [(Expr Double, TypedColumn Double)]
forall a. a -> [a] -> [a]
: Set (TypedColumn Double)
-> [Expr Double] -> [(Expr Double, TypedColumn Double)]
go (TypedColumn Double
-> Set (TypedColumn Double) -> Set (TypedColumn Double)
forall a. Ord a => a -> Set a -> Set a
S.insert TypedColumn Double
res Set (TypedColumn Double)
seen) [Expr Double]
xs
where
res :: TypedColumn Double
res = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
x of
Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right TypedColumn Double
v -> TypedColumn Double
v
hasInvalid :: Bool
hasInvalid = case TypedColumn Double
res of
(TColumn (UnboxedColumn (Vector a
col :: VU.Vector a))) -> case TypeRep Double -> TypeRep a -> Maybe (Double :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) of
Just Double :~: a
Refl -> (a -> Bool) -> Vector a -> Bool
forall a. Unbox a => (a -> Bool) -> Vector a -> Bool
VU.any (\a
n -> a -> Bool
forall a. RealFloat a => a -> Bool
isNaN a
n Bool -> Bool -> Bool
|| a -> Bool
forall a. RealFloat a => a -> Bool
isInfinite a
n) Vector a
col
Maybe (Double :~: a)
Nothing -> Bool
False
TypedColumn Double
_ -> Bool
False
equivalent :: DataFrame -> Expr Double -> Expr Double -> Bool
equivalent :: DataFrame -> Expr Double -> Expr Double -> Bool
equivalent DataFrame
df Expr Double
p1 Expr Double
p2 = case TypedColumn Double -> TypedColumn Double -> Bool
forall a. Eq a => a -> a -> Bool
(Prelude.==) (TypedColumn Double -> TypedColumn Double -> Bool)
-> Either DataFrameException (TypedColumn Double)
-> Either DataFrameException (TypedColumn Double -> Bool)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
p1 Either DataFrameException (TypedColumn Double -> Bool)
-> Either DataFrameException (TypedColumn Double)
-> Either DataFrameException Bool
forall a b.
Either DataFrameException (a -> b)
-> Either DataFrameException a -> Either DataFrameException b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
p2 of
Left DataFrameException
e -> DataFrameException -> Bool
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Bool
v -> Bool
v
synthesizeFeatureExpr ::
T.Text ->
Int ->
Int ->
DataFrame ->
Either String (Expr Double)
synthesizeFeatureExpr :: Text -> Int -> Int -> DataFrame -> Either [Char] (Expr Double)
synthesizeFeatureExpr Text
target Int
d Int
b DataFrame
df =
let
df' :: DataFrame
df' = [Text] -> DataFrame -> DataFrame
exclude [Text
target] DataFrame
df
t :: TypedColumn Double
t = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df (Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
target) of
Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right TypedColumn Double
v -> TypedColumn Double
v
in
case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
DataFrame
df'
(Int
-> Int
-> (Vector Double -> Vector Double -> Maybe Double)
-> BeamConfig
BeamConfig Int
d Int
b (\Vector Double
l Vector Double
r -> (Double -> Integer -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ Integer
2) (Double -> Double) -> Maybe Double -> Maybe Double
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Vector Double -> Vector Double -> Maybe Double
correlation' Vector Double
l Vector Double
r))
TypedColumn Double
t
[] of
Maybe (Expr Double)
Nothing -> [Char] -> Either [Char] (Expr Double)
forall a b. a -> Either a b
Left [Char]
"No programs found"
Just Expr Double
p -> Expr Double -> Either [Char] (Expr Double)
forall a b. b -> Either a b
Right Expr Double
p
fitRegression ::
T.Text ->
Int ->
Int ->
DataFrame ->
Either String (Expr Double)
fitRegression :: Text -> Int -> Int -> DataFrame -> Either [Char] (Expr Double)
fitRegression Text
target Int
d Int
b DataFrame
df =
let
df' :: DataFrame
df' = [Text] -> DataFrame -> DataFrame
exclude [Text
target] DataFrame
df
targetMean :: Double
targetMean = Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0 (Maybe Double -> Double) -> Maybe Double -> Double
forall a b. (a -> b) -> a -> b
$ Text -> DataFrame -> Maybe Double
Stats.mean Text
target DataFrame
df
t :: TypedColumn Double
t = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df (Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
target) of
Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right TypedColumn Double
v -> TypedColumn Double
v
in
case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
DataFrame
df'
( Int
-> Int
-> (Vector Double -> Vector Double -> Maybe Double)
-> BeamConfig
BeamConfig
Int
d
Int
b
( \Vector Double
l Vector Double
r ->
Int -> Vector Double -> Vector Double -> Maybe Double
mutualInformationBinned
(Int -> Int -> Int
forall a. Ord a => a -> a -> a
Prelude.max Int
10 (Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Double
forall a. Floating a => a -> a
sqrt (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
l)))))
Vector Double
l
Vector Double
r
)
)
TypedColumn Double
t
[] of
Maybe (Expr Double)
Nothing -> [Char] -> Either [Char] (Expr Double)
forall a b. a -> Either a b
Left [Char]
"No programs found"
Just Expr Double
p ->
[Char]
-> Either [Char] (Expr Double) -> Either [Char] (Expr Double)
forall a. [Char] -> a -> a
trace (Expr Double -> [Char]
forall a. Show a => a -> [Char]
show Expr Double
p) (Either [Char] (Expr Double) -> Either [Char] (Expr Double))
-> Either [Char] (Expr Double) -> Either [Char] (Expr Double)
forall a b. (a -> b) -> a -> b
$
let
in case DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
( Text -> Expr Double -> DataFrame -> DataFrame
forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
D.derive Text
"_generated_regression_feature_" Expr Double
p DataFrame
df
DataFrame -> (DataFrame -> DataFrame) -> DataFrame
forall a b. a -> (a -> b) -> b
& [Text] -> DataFrame -> DataFrame
select [Text
"_generated_regression_feature_"]
)
(Int
-> Int
-> (Vector Double -> Vector Double -> Maybe Double)
-> BeamConfig
BeamConfig Int
d Int
b (\Vector Double
l Vector Double
r -> (Double -> Double) -> Maybe Double -> Maybe Double
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Double -> Double
forall a. Num a => a -> a
negate (Vector Double -> Vector Double -> Maybe Double
meanSquaredError Vector Double
l Vector Double
r)))
TypedColumn Double
t
[Text -> Expr Double
forall a. Columnable a => Text -> Expr a
Col Text
"_generated_regression_feature_", Double -> Expr Double
forall a. Columnable a => a -> Expr a
lit Double
targetMean, Double -> Expr Double
forall a. Columnable a => a -> Expr a
lit Double
10] of
Maybe (Expr Double)
Nothing -> [Char] -> Either [Char] (Expr Double)
forall a b. a -> Either a b
Left [Char]
"Could not find coefficients"
Just Expr Double
p' -> Expr Double -> Either [Char] (Expr Double)
forall a b. b -> Either a b
Right (Expr Double -> Expr Double -> Expr Double -> Expr Double
forall a b c.
(Columnable a, Columnable b, Columnable c) =>
Expr a -> Expr b -> Expr c -> Expr c
replaceExpr Expr Double
p (forall a. Columnable a => Text -> Expr a
Col @Double Text
"_generated_regression_feature_") Expr Double
p')
data BeamConfig = BeamConfig
{ BeamConfig -> Int
searchDepth :: Int
, BeamConfig -> Int
beamLength :: Int
, BeamConfig -> Vector Double -> Vector Double -> Maybe Double
rankingFunction :: VU.Vector Double -> VU.Vector Double -> Maybe Double
}
beamSearch ::
DataFrame ->
BeamConfig ->
TypedColumn Double ->
[Expr Double] ->
Maybe (Expr Double)
beamSearch :: DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch DataFrame
df BeamConfig
cfg TypedColumn Double
outputs [Expr Double]
programs
| BeamConfig -> Int
searchDepth BeamConfig
cfg Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
Prelude.== Int
0 = case [Expr Double]
ps of
[] -> Maybe (Expr Double)
forall a. Maybe a
Nothing
(Expr Double
x : [Expr Double]
_) -> Expr Double -> Maybe (Expr Double)
forall a. a -> Maybe a
Just Expr Double
x
| Bool
otherwise =
DataFrame
-> BeamConfig
-> TypedColumn Double
-> [Expr Double]
-> Maybe (Expr Double)
beamSearch
DataFrame
df
(BeamConfig
cfg{searchDepth = searchDepth cfg - 1})
TypedColumn Double
outputs
([Expr Double] -> [Expr Double] -> [Expr Double]
generatePrograms ((Text -> Expr Double) -> [Text] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Expr Double
forall a. Columnable a => Text -> Expr a
col [Text]
names) [Expr Double]
ps)
where
ps :: [Expr Double]
ps = DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn Double)]
-> [Expr Double]
forall a.
DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn a)]
-> [Expr Double]
pickTopN DataFrame
df TypedColumn Double
outputs BeamConfig
cfg ([(Expr Double, TypedColumn Double)] -> [Expr Double])
-> [(Expr Double, TypedColumn Double)] -> [Expr Double]
forall a b. (a -> b) -> a -> b
$ DataFrame -> [Expr Double] -> [(Expr Double, TypedColumn Double)]
deduplicate DataFrame
df [Expr Double]
programs
names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
pickTopN ::
DataFrame ->
TypedColumn Double ->
BeamConfig ->
[(Expr Double, TypedColumn a)] ->
[Expr Double]
pickTopN :: forall a.
DataFrame
-> TypedColumn Double
-> BeamConfig
-> [(Expr Double, TypedColumn a)]
-> [Expr Double]
pickTopN DataFrame
_ TypedColumn Double
_ BeamConfig
_ [] = []
pickTopN DataFrame
df (TColumn Column
col) BeamConfig
cfg [(Expr Double, TypedColumn a)]
ps =
let
l :: Vector Double
l = case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col of
Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector Double
v -> Vector Double
v
ordered :: [Expr Double]
ordered =
Int -> [Expr Double] -> [Expr Double]
forall a. Int -> [a] -> [a]
Prelude.take
(BeamConfig -> Int
beamLength BeamConfig
cfg)
( ((Expr Double, Maybe Double) -> Expr Double)
-> [(Expr Double, Maybe Double)] -> [Expr Double]
forall a b. (a -> b) -> [a] -> [b]
map (Expr Double, Maybe Double) -> Expr Double
forall a b. (a, b) -> a
fst ([(Expr Double, Maybe Double)] -> [Expr Double])
-> [(Expr Double, Maybe Double)] -> [Expr Double]
forall a b. (a -> b) -> a -> b
$
((Expr Double, Maybe Double)
-> (Expr Double, Maybe Double) -> Ordering)
-> [(Expr Double, Maybe Double)] -> [(Expr Double, Maybe Double)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy
( \(Expr Double
_, Maybe Double
c2) (Expr Double
_, Maybe Double
c1) ->
if Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c1
Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Maybe Double
c2
Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c1
Bool -> Bool -> Bool
|| Bool -> (Double -> Bool) -> Maybe Double -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Maybe Double
c2
then Ordering
LT
else Maybe Double -> Maybe Double -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Maybe Double
c1 Maybe Double
c2
)
(((Expr Double, TypedColumn a) -> (Expr Double, Maybe Double))
-> [(Expr Double, TypedColumn a)] -> [(Expr Double, Maybe Double)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Expr Double
e, TypedColumn a
res) -> (Expr Double
e, BeamConfig -> Vector Double -> Vector Double -> Maybe Double
rankingFunction BeamConfig
cfg Vector Double
l (TypedColumn a -> Vector Double
forall {w :: * -> *} {a}.
Vector w Double =>
TypedColumn a -> w Double
asDoubleVector TypedColumn a
res))) [(Expr Double, TypedColumn a)]
ps)
)
asDoubleVector :: TypedColumn a -> w Double
asDoubleVector TypedColumn a
c =
let
(TColumn Column
col') = TypedColumn a
c
in
case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col' of
Left DataFrameException
e -> DataFrameException -> w Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector Double
v -> Vector Double -> w Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
v
interpretDoubleVector :: Expr Double -> Vector Double
interpretDoubleVector Expr Double
e =
let
(TColumn Column
col') = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
e of
Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right TypedColumn Double
v -> TypedColumn Double
v
in
case forall a (v :: * -> *).
(Vector v a, Columnable a) =>
Column -> Either DataFrameException (v a)
toVector @Double @VU.Vector Column
col' of
Left DataFrameException
e -> DataFrameException -> Vector Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector Double
v -> Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
v
in
[Char] -> [Expr Double] -> [Expr Double]
forall a. [Char] -> a -> a
trace
( [Char]
"Best loss: "
[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Maybe (Maybe Double) -> [Char]
forall a. Show a => a -> [Char]
show
(BeamConfig -> Vector Double -> Vector Double -> Maybe Double
rankingFunction BeamConfig
cfg Vector Double
l (Vector Double -> Maybe Double)
-> Maybe (Vector Double) -> Maybe (Maybe Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Expr Double -> Vector Double
interpretDoubleVector (Expr Double -> Vector Double)
-> Maybe (Expr Double) -> Maybe (Vector Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([Expr Double] -> Maybe (Expr Double)
forall a. [a] -> Maybe a
listToMaybe [Expr Double]
ordered)))
[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" "
[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ (if [Expr Double] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Expr Double]
ordered then [Char]
"empty" else Maybe (Expr Double) -> [Char]
forall a. Show a => a -> [Char]
show ([Expr Double] -> Maybe (Expr Double)
forall a. [a] -> Maybe a
listToMaybe [Expr Double]
ordered))
)
[Expr Double]
ordered
satisfiesExamples :: DataFrame -> TypedColumn Double -> Expr Double -> Bool
satisfiesExamples :: DataFrame -> TypedColumn Double -> Expr Double -> Bool
satisfiesExamples DataFrame
df TypedColumn Double
col Expr Double
expr =
let
result :: TypedColumn Double
result = case DataFrame
-> Expr Double -> Either DataFrameException (TypedColumn Double)
forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret DataFrame
df Expr Double
expr of
Left DataFrameException
e -> DataFrameException -> TypedColumn Double
forall a e. Exception e => e -> a
throw DataFrameException
e
Right TypedColumn Double
v -> TypedColumn Double
v
in
TypedColumn Double
result TypedColumn Double -> TypedColumn Double -> Bool
forall a. Eq a => a -> a -> Bool
Prelude.== TypedColumn Double
col
isReservedId :: T.Text -> Bool
isReservedId :: Text -> Bool
isReservedId Text
t = case Text
t of
Text
"case" -> Bool
True
Text
"class" -> Bool
True
Text
"data" -> Bool
True
Text
"default" -> Bool
True
Text
"deriving" -> Bool
True
Text
"do" -> Bool
True
Text
"else" -> Bool
True
Text
"foreign" -> Bool
True
Text
"if" -> Bool
True
Text
"import" -> Bool
True
Text
"in" -> Bool
True
Text
"infix" -> Bool
True
Text
"infixl" -> Bool
True
Text
"infixr" -> Bool
True
Text
"instance" -> Bool
True
Text
"let" -> Bool
True
Text
"module" -> Bool
True
Text
"newtype" -> Bool
True
Text
"of" -> Bool
True
Text
"then" -> Bool
True
Text
"type" -> Bool
True
Text
"where" -> Bool
True
Text
_ -> Bool
False
isVarId :: T.Text -> Bool
isVarId :: Text -> Bool
isVarId Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
c, Text
_) -> Char -> Bool
Char.isLower Char
c Bool -> Bool -> Bool
&& Char -> Bool
Char.isAlpha Char
c
Maybe (Char, Text)
Nothing -> Bool
False
isHaskellIdentifier :: T.Text -> Bool
isHaskellIdentifier :: Text -> Bool
isHaskellIdentifier Text
t = Bool -> Bool
Prelude.not (Text -> Bool
isVarId Text
t) Bool -> Bool -> Bool
|| Text -> Bool
isReservedId Text
t
sanitize :: T.Text -> T.Text
sanitize :: Text -> Text
sanitize Text
t
| Bool
isValid = Text
t
| Text -> Bool
isHaskellIdentifier Text
t' = Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_"
| Bool
otherwise = Text
t'
where
isValid :: Bool
isValid =
Bool -> Bool
Prelude.not (Text -> Bool
isHaskellIdentifier Text
t)
Bool -> Bool -> Bool
&& Text -> Bool
isVarId Text
t
Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
Char.isAlphaNum Text
t
t' :: Text
t' = (Char -> Char) -> Text -> Text
T.map Char -> Char
replaceInvalidCharacters (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Bool -> Bool
Prelude.not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
parentheses) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t
replaceInvalidCharacters :: Char -> Char
replaceInvalidCharacters Char
c
| Char -> Bool
Char.isUpper Char
c = Char -> Char
Char.toLower Char
c
| Char -> Bool
Char.isSpace Char
c = Char
'_'
| Char -> Bool
Char.isPunctuation Char
c = Char
'_'
| Char -> Bool
Char.isSymbol Char
c = Char
'_'
| Char -> Bool
Char.isAlphaNum Char
c = Char
c
| Bool
otherwise = Char
'_'
parentheses :: Char -> Bool
parentheses Char
c = case Char
c of
Char
'(' -> Bool
True
Char
')' -> Bool
True
Char
'{' -> Bool
True
Char
'}' -> Bool
True
Char
'[' -> Bool
True
Char
']' -> Bool
True
Char
_ -> Bool
False
typeFromString :: [String] -> Q Type
typeFromString :: [[Char]] -> Q Type
typeFromString [] = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No type specified"
typeFromString [[Char]
t] = do
Maybe Name
maybeType <- [Char] -> Q (Maybe Name)
lookupTypeName [Char]
t
case Maybe Name
maybeType of
Just Name
name -> Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Name -> Type
ConT Name
name)
Maybe Name
Nothing -> [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
t
typeFromString [[Char]
tycon, [Char]
t1] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
inner <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT Type
outer Type
inner)
typeFromString [[Char]
tycon, [Char]
t1, [Char]
t2] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
lhs <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type
rhs <- [[Char]] -> Q Type
typeFromString [[Char]
t2]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT (Type -> Type -> Type
AppT Type
outer Type
lhs) Type
rhs)
typeFromString [[Char]]
s = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [[Char]] -> [Char]
unwords [[Char]]
s
declareColumns :: DataFrame -> DecsQ
declareColumns :: DataFrame -> DecsQ
declareColumns DataFrame
df =
let
names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
types :: [[Char]]
types = (Text -> [Char]) -> [Text] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Column -> [Char]
columnTypeString (Column -> [Char]) -> (Text -> Column) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> DataFrame -> Column
`unsafeGetColumn` DataFrame
df)) [Text]
names
specs :: [(Text, Text, [Char])]
specs = (Text -> [Char] -> (Text, Text, [Char]))
-> [Text] -> [[Char]] -> [(Text, Text, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
name [Char]
type_ -> (Text
name, Text -> Text
sanitize Text
name, [Char]
type_)) [Text]
names [[Char]]
types
in
([[Dec]] -> [Dec]) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Dec]] -> [Dec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Q [[Dec]] -> DecsQ) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> a -> b
$ [(Text, Text, [Char])]
-> ((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Text, Text, [Char])]
specs (((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]])
-> ((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall a b. (a -> b) -> a -> b
$ \(Text
raw, Text
nm, [Char]
tyStr) -> do
Type
ty <- [[Char]] -> Q Type
typeFromString ([Char] -> [[Char]]
words [Char]
tyStr)
Text -> Q () -> Q ()
forall a b. Show a => a -> b -> b
traceShow (Text
nm Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" :: Expr " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
tyStr) (() -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
let n :: Name
n = [Char] -> Name
mkName (Text -> [Char]
T.unpack Text
nm)
Dec
sig <- Name -> Q Type -> Q Dec
forall (m :: * -> *). Quote m => Name -> m Type -> m Dec
sigD Name
n [t|Expr $(Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Type
ty)|]
Dec
val <- Q Pat -> Q Body -> [Q Dec] -> Q Dec
forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Dec
valD (Name -> Q Pat
forall (m :: * -> *). Quote m => Name -> m Pat
varP Name
n) (Q Exp -> Q Body
forall (m :: * -> *). Quote m => m Exp -> m Body
normalB [|col $(Text -> Q Exp
forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
forall (m :: * -> *). Quote m => Text -> m Exp
TH.lift Text
raw)|]) []
[Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Dec
sig, Dec
val]