{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
module DataFrame.Functions where
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame (..), unsafeGetColumn)
import DataFrame.Internal.Expression (Expr (..), UExpr (..))
import Control.Monad
import qualified Data.Char as Char
import Data.Function
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as VB
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import Debug.Trace (traceShow)
import Language.Haskell.TH
import qualified Language.Haskell.TH.Syntax as TH
import Type.Reflection (typeRep)
col :: (Columnable a) => T.Text -> Expr a
col :: forall a. Columnable a => Text -> Expr a
col = Text -> Expr a
forall a. Columnable a => Text -> Expr a
Col
as :: (Columnable a) => Expr a -> T.Text -> (T.Text, UExpr)
as :: forall a. Columnable a => Expr a -> Text -> (Text, UExpr)
as Expr a
expr Text
name = (Text
name, Expr a -> UExpr
forall a. Columnable a => Expr a -> UExpr
Wrap Expr a
expr)
lit :: (Columnable a) => a -> Expr a
lit :: forall a. Columnable a => a -> Expr a
lit = a -> Expr a
forall a. Columnable a => a -> Expr a
Lit
lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b
lift :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr a -> Expr b
lift = Text -> (a -> b) -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
Apply Text
"udf"
lift2 :: (Columnable c, Columnable b, Columnable a) => (c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 :: forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 = Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"udf"
eq :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq :: forall a. (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"eq" a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==)
lt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"lt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<)
gt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"gt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>)
leq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
leq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
leq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"leq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<=)
geq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
geq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
geq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"geq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>=)
count :: (Columnable a) => Expr a -> Expr Int
count :: forall a. Columnable a => Expr a -> Expr Int
count (Col Text
name) = Text
-> Text
-> (forall (v :: * -> *) b.
(Vector v b, Columnable b) =>
v b -> Int)
-> Expr Int
forall a.
Columnable a =>
Text
-> Text
-> (forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> a)
-> Expr a
GeneralAggregate Text
name Text
"count" v b -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> Int
VG.length
count Expr a
_ = [Char] -> Expr Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Argument can only be a column reference not an unevaluated expression"
minimum :: (Columnable a) => Expr a -> Expr a
minimum :: forall a. Columnable a => Expr a -> Expr a
minimum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"minimum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
min
maximum :: (Columnable a) => Expr a -> Expr a
maximum :: forall a. Columnable a => Expr a -> Expr a
maximum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"maximum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
max
sum :: forall a. (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr a
sum :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr a
sum (Col Text
name) = Text -> Text -> (Vector a -> a) -> Expr a
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"sum" Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum
mean :: (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr Double
mean :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr Double
mean (Col Text
name) =
let
mean' :: v Double -> Double
mean' v Double
samp =
let
(!Double
total, !Int
n) = ((Double, Int) -> Double -> (Double, Int))
-> (Double, Int) -> v Double -> (Double, Int)
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl' (\(!Double
total, !Int
n) Double
v -> (Double
total Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) (Double
0 :: Double, Int
0 :: Int) v Double
samp
in
Double
total Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
in
Text -> Text -> (Vector Double -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"mean" Vector Double -> Double
forall {v :: * -> *}. Vector v Double => v Double -> Double
mean'
isReservedId :: T.Text -> Bool
isReservedId :: Text -> Bool
isReservedId Text
t = case Text
t of
Text
"case" -> Bool
True
Text
"class" -> Bool
True
Text
"data" -> Bool
True
Text
"default" -> Bool
True
Text
"deriving" -> Bool
True
Text
"do" -> Bool
True
Text
"else" -> Bool
True
Text
"foreign" -> Bool
True
Text
"if" -> Bool
True
Text
"import" -> Bool
True
Text
"in" -> Bool
True
Text
"infix" -> Bool
True
Text
"infixl" -> Bool
True
Text
"infixr" -> Bool
True
Text
"instance" -> Bool
True
Text
"let" -> Bool
True
Text
"module" -> Bool
True
Text
"newtype" -> Bool
True
Text
"of" -> Bool
True
Text
"then" -> Bool
True
Text
"type" -> Bool
True
Text
"where" -> Bool
True
Text
_ -> Bool
False
isVarId :: T.Text -> Bool
isVarId :: Text -> Bool
isVarId Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
c, Text
_) -> Char -> Bool
Char.isLower Char
c Bool -> Bool -> Bool
&& Char -> Bool
Char.isAlpha Char
c
Maybe (Char, Text)
Nothing -> Bool
False
isHaskellIdentifier :: T.Text -> Bool
isHaskellIdentifier :: Text -> Bool
isHaskellIdentifier Text
t = Bool -> Bool
not (Text -> Bool
isVarId Text
t) Bool -> Bool -> Bool
|| Text -> Bool
isReservedId Text
t
sanitize :: T.Text -> T.Text
sanitize :: Text -> Text
sanitize Text
t
| Bool
isValid = Text
t
| Text -> Bool
isHaskellIdentifier Text
t' = Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_"
| Bool
otherwise = Text
t'
where
isValid :: Bool
isValid =
Bool -> Bool
not (Text -> Bool
isHaskellIdentifier Text
t)
Bool -> Bool -> Bool
&& Text -> Bool
isVarId Text
t
Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
Char.isAlphaNum Text
t
t' :: Text
t' = (Char -> Char) -> Text -> Text
T.map Char -> Char
replaceInvalidCharacters (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
parentheses) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t
replaceInvalidCharacters :: Char -> Char
replaceInvalidCharacters Char
c
| Char -> Bool
Char.isUpper Char
c = Char -> Char
Char.toLower Char
c
| Char -> Bool
Char.isSpace Char
c = Char
'_'
| Char -> Bool
Char.isPunctuation Char
c = Char
'_'
| Char -> Bool
Char.isSymbol Char
c = Char
'_'
| Char -> Bool
Char.isAlphaNum Char
c = Char
c
| Bool
otherwise = Char
'_'
parentheses :: Char -> Bool
parentheses Char
c = case Char
c of
Char
'(' -> Bool
True
Char
')' -> Bool
True
Char
'{' -> Bool
True
Char
'}' -> Bool
True
Char
'[' -> Bool
True
Char
']' -> Bool
True
Char
_ -> Bool
False
typeFromString :: [String] -> Q Type
typeFromString :: [[Char]] -> Q Type
typeFromString [] = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No type specified"
typeFromString [[Char]
t] = do
Maybe Name
maybeType <- [Char] -> Q (Maybe Name)
lookupTypeName [Char]
t
case Maybe Name
maybeType of
Just Name
name -> Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Name -> Type
ConT Name
name)
Maybe Name
Nothing -> [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
t
typeFromString [[Char]
tycon, [Char]
t1] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
inner <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT Type
outer Type
inner)
typeFromString [[Char]
tycon, [Char]
t1, [Char]
t2] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
lhs <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type
rhs <- [[Char]] -> Q Type
typeFromString [[Char]
t2]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT (Type -> Type -> Type
AppT Type
outer Type
lhs) Type
rhs)
typeFromString [[Char]]
s = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([[Char]] -> [Char]
unwords [[Char]]
s)
declareColumns :: DataFrame -> DecsQ
declareColumns :: DataFrame -> DecsQ
declareColumns DataFrame
df =
let
names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
types :: [[Char]]
types = (Text -> [Char]) -> [Text] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Column -> [Char]
columnTypeString (Column -> [Char]) -> (Text -> Column) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> DataFrame -> Column
`unsafeGetColumn` DataFrame
df)) [Text]
names
specs :: [(Text, [Char])]
specs = (Text -> [Char] -> (Text, [Char]))
-> [Text] -> [[Char]] -> [(Text, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
name [Char]
type_ -> (Text -> Text
sanitize Text
name, [Char]
type_)) [Text]
names [[Char]]
types
in
([[Dec]] -> [Dec]) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Dec]] -> [Dec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Q [[Dec]] -> DecsQ) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> a -> b
$ [(Text, [Char])] -> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Text, [Char])]
specs (((Text, [Char]) -> DecsQ) -> Q [[Dec]])
-> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall a b. (a -> b) -> a -> b
$ \(Text
nm, [Char]
tyStr) -> do
Text -> Q () -> Q ()
forall a b. Show a => a -> b -> b
traceShow Text
nm (() -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
Type
ty <- [[Char]] -> Q Type
typeFromString ([Char] -> [[Char]]
words [Char]
tyStr)
let n :: Name
n = [Char] -> Name
mkName (Text -> [Char]
T.unpack Text
nm)
Dec
sig <- Name -> Q Type -> Q Dec
forall (m :: * -> *). Quote m => Name -> m Type -> m Dec
sigD Name
n [t|Expr $(Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Type
ty)|]
Dec
val <- Q Pat -> Q Body -> [Q Dec] -> Q Dec
forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Dec
valD (Name -> Q Pat
forall (m :: * -> *). Quote m => Name -> m Pat
varP Name
n) (Q Exp -> Q Body
forall (m :: * -> *). Quote m => m Exp -> m Body
normalB [|col $(Text -> Q Exp
forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
forall (m :: * -> *). Quote m => Text -> m Exp
TH.lift Text
nm)|]) []
[Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Dec
sig, Dec
val]