{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
module DataFrame.Functions where
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame(..), unsafeGetColumn)
import DataFrame.Internal.Expression (Expr(..), UExpr(..))
import Control.Monad
import Data.Function
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector as VB
import Language.Haskell.TH
import qualified Language.Haskell.TH.Syntax as TH
import qualified Data.Char as Char
import Debug.Trace (traceShow)
import Type.Reflection (typeRep)
col :: Columnable a => T.Text -> Expr a
col :: forall a. Columnable a => Text -> Expr a
col = Text -> Expr a
forall a. Columnable a => Text -> Expr a
Col
as :: Columnable a => Expr a -> T.Text -> (T.Text, UExpr)
as :: forall a. Columnable a => Expr a -> Text -> (Text, UExpr)
as Expr a
expr Text
name = (Text
name, Expr a -> UExpr
forall a. Columnable a => Expr a -> UExpr
Wrap Expr a
expr)
lit :: Columnable a => a -> Expr a
lit :: forall a. Columnable a => a -> Expr a
lit = a -> Expr a
forall a. Columnable a => a -> Expr a
Lit
lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b
lift :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr a -> Expr b
lift = Text -> (a -> b) -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
Apply Text
"udf"
lift2 :: (Columnable c, Columnable b, Columnable a) => (c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 :: forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 = Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"udf"
eq :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq :: forall a. (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"eq" a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==)
lt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"lt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<)
gt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"gt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>)
leq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
leq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
leq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"leq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<=)
geq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
geq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
geq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"geq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>=)
count :: Columnable a => Expr a -> Expr Int
count :: forall a. Columnable a => Expr a -> Expr Int
count (Col Text
name) = Text
-> Text
-> (forall (v :: * -> *) b.
(Vector v b, Columnable b) =>
v b -> Int)
-> Expr Int
forall a.
Columnable a =>
Text
-> Text
-> (forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> a)
-> Expr a
GeneralAggregate Text
name Text
"count" v b -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> Int
VG.length
count Expr a
_ = [Char] -> Expr Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Argument can only be a column reference not an unevaluated expression"
minimum :: Columnable a => Expr a -> Expr a
minimum :: forall a. Columnable a => Expr a -> Expr a
minimum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"minimum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
min
maximum :: Columnable a => Expr a -> Expr a
maximum :: forall a. Columnable a => Expr a -> Expr a
maximum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"maximum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
max
sum :: forall a . (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr a
sum :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr a
sum (Col Text
name) = Text -> Text -> (Vector a -> a) -> Expr a
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"sum" Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum
mean :: (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr Double
mean :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr Double
mean (Col Text
name) = let
mean' :: v Double -> Double
mean' v Double
samp = let
(!Double
total, !Int
n) = ((Double, Int) -> Double -> (Double, Int))
-> (Double, Int) -> v Double -> (Double, Int)
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl' (\(!Double
total, !Int
n) Double
v -> (Double
total Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) (Double
0 :: Double, Int
0 :: Int) v Double
samp
in Double
total Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
in Text -> Text -> (Vector Double -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"mean" Vector Double -> Double
forall {v :: * -> *}. Vector v Double => v Double -> Double
mean'
isReservedId :: T.Text -> Bool
isReservedId :: Text -> Bool
isReservedId Text
t = case Text
t of
Text
"case" -> Bool
True
Text
"class" -> Bool
True
Text
"data" -> Bool
True
Text
"default" -> Bool
True
Text
"deriving" -> Bool
True
Text
"do" -> Bool
True
Text
"else" -> Bool
True
Text
"foreign" -> Bool
True
Text
"if" -> Bool
True
Text
"import" -> Bool
True
Text
"in" -> Bool
True
Text
"infix" -> Bool
True
Text
"infixl" -> Bool
True
Text
"infixr" -> Bool
True
Text
"instance" -> Bool
True
Text
"let" -> Bool
True
Text
"module" -> Bool
True
Text
"newtype" -> Bool
True
Text
"of" -> Bool
True
Text
"then" -> Bool
True
Text
"type" -> Bool
True
Text
"where" -> Bool
True
Text
_ -> Bool
False
isVarId :: T.Text -> Bool
isVarId :: Text -> Bool
isVarId Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
Just (Char
c, Text
_) -> Char -> Bool
Char.isLower Char
c Bool -> Bool -> Bool
&& Char -> Bool
Char.isAlpha Char
c
Maybe (Char, Text)
Nothing -> Bool
False
isHaskellIdentifier :: T.Text -> Bool
isHaskellIdentifier :: Text -> Bool
isHaskellIdentifier Text
t = Bool -> Bool
not (Text -> Bool
isVarId Text
t) Bool -> Bool -> Bool
|| Text -> Bool
isReservedId Text
t
sanitize :: T.Text -> T.Text
sanitize :: Text -> Text
sanitize Text
t
| Bool
isValid = Text
t
| Text -> Bool
isHaskellIdentifier Text
t' = Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_"
| Bool
otherwise = Text
t'
where
isValid :: Bool
isValid
= Bool -> Bool
not (Text -> Bool
isHaskellIdentifier Text
t)
Bool -> Bool -> Bool
&& Text -> Bool
isVarId Text
t
Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
Char.isAlphaNum Text
t
t' :: Text
t' = (Char -> Char) -> Text -> Text
T.map Char -> Char
replaceInvalidCharacters (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
parentheses) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t
replaceInvalidCharacters :: Char -> Char
replaceInvalidCharacters Char
c
| Char -> Bool
Char.isUpper Char
c = Char -> Char
Char.toLower Char
c
| Char -> Bool
Char.isSpace Char
c = Char
'_'
| Char -> Bool
Char.isPunctuation Char
c = Char
'_'
| Char -> Bool
Char.isSymbol Char
c = Char
'_'
| Char -> Bool
Char.isAlphaNum Char
c = Char
c
| Bool
otherwise = Char
'_'
parentheses :: Char -> Bool
parentheses Char
c = case Char
c of
Char
'(' -> Bool
True
Char
')' -> Bool
True
Char
'{' -> Bool
True
Char
'}' -> Bool
True
Char
'[' -> Bool
True
Char
']' -> Bool
True
Char
_ -> Bool
False
typeFromString :: [String] -> Q Type
typeFromString :: [[Char]] -> Q Type
typeFromString [] = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No type specified"
typeFromString [[Char]
t] = do
Maybe Name
maybeType <- [Char] -> Q (Maybe Name)
lookupTypeName [Char]
t
case Maybe Name
maybeType of
Just Name
name -> Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Name -> Type
ConT Name
name)
Maybe Name
Nothing -> [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
t
typeFromString [[Char]
tycon,[Char]
t1] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
inner <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT Type
outer Type
inner)
typeFromString [[Char]
tycon,[Char]
t1,[Char]
t2] = do
Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
Type
lhs <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
Type
rhs <- [[Char]] -> Q Type
typeFromString [[Char]
t2]
Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT (Type -> Type -> Type
AppT Type
outer Type
lhs) Type
rhs)
typeFromString [[Char]]
s = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([[Char]] -> [Char]
unwords [[Char]]
s)
declareColumns :: DataFrame -> DecsQ
declareColumns :: DataFrame -> DecsQ
declareColumns DataFrame
df = let
names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd)([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
types :: [[Char]]
types = (Text -> [Char]) -> [Text] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Column -> [Char]
columnTypeString (Column -> [Char]) -> (Text -> Column) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> DataFrame -> Column
`unsafeGetColumn` DataFrame
df)) [Text]
names
specs :: [(Text, [Char])]
specs = (Text -> [Char] -> (Text, [Char]))
-> [Text] -> [[Char]] -> [(Text, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
name [Char]
type_ -> (Text -> Text
sanitize Text
name, [Char]
type_)) [Text]
names [[Char]]
types
in ([[Dec]] -> [Dec]) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Dec]] -> [Dec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Q [[Dec]] -> DecsQ) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> a -> b
$ [(Text, [Char])] -> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Text, [Char])]
specs (((Text, [Char]) -> DecsQ) -> Q [[Dec]])
-> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall a b. (a -> b) -> a -> b
$ \(Text
nm, [Char]
tyStr) -> do
Text -> Q () -> Q ()
forall a b. Show a => a -> b -> b
traceShow Text
nm (() -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
Type
ty <- [[Char]] -> Q Type
typeFromString ([Char] -> [[Char]]
words [Char]
tyStr)
let n :: Name
n = [Char] -> Name
mkName (Text -> [Char]
T.unpack Text
nm)
Dec
sig <- Name -> Q Type -> Q Dec
forall (m :: * -> *). Quote m => Name -> m Type -> m Dec
sigD Name
n [t| Expr $(Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Type
ty) |]
Dec
val <- Q Pat -> Q Body -> [Q Dec] -> Q Dec
forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Dec
valD (Name -> Q Pat
forall (m :: * -> *). Quote m => Name -> m Pat
varP Name
n) (Q Exp -> Q Body
forall (m :: * -> *). Quote m => m Exp -> m Body
normalB [| col $(Text -> Q Exp
forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
forall (m :: * -> *). Quote m => Text -> m Exp
TH.lift Text
nm) |]) []
[Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Dec
sig, Dec
val]