{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
module DataFrame.Functions where

import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (DataFrame(..), unsafeGetColumn)
import DataFrame.Internal.Expression (Expr(..), UExpr(..))

import           Control.Monad
import           Data.Function
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU
import qualified Data.Vector as VB
import           Language.Haskell.TH
import qualified Language.Haskell.TH.Syntax as TH
import qualified Data.Char as Char
import Debug.Trace (traceShow)
import Type.Reflection (typeRep)

col :: Columnable a => T.Text -> Expr a
col :: forall a. Columnable a => Text -> Expr a
col = Text -> Expr a
forall a. Columnable a => Text -> Expr a
Col

as :: Columnable a => Expr a -> T.Text -> (T.Text, UExpr)
as :: forall a. Columnable a => Expr a -> Text -> (Text, UExpr)
as Expr a
expr Text
name = (Text
name, Expr a -> UExpr
forall a. Columnable a => Expr a -> UExpr
Wrap Expr a
expr)

lit :: Columnable a => a -> Expr a
lit :: forall a. Columnable a => a -> Expr a
lit = a -> Expr a
forall a. Columnable a => a -> Expr a
Lit

lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b
lift :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr a -> Expr b
lift = Text -> (a -> b) -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
Text -> (b -> a) -> Expr b -> Expr a
Apply Text
"udf"

lift2 :: (Columnable c, Columnable b, Columnable a) => (c -> b -> a) -> Expr c -> Expr b -> Expr a 
lift2 :: forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 = Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"udf"

eq :: (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq :: forall a. (Columnable a, Eq a) => Expr a -> Expr a -> Expr Bool
eq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"eq" a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==)

lt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
lt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"lt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<)

gt :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr Bool
gt = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"gt" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>)

leq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
leq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
leq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"leq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<=)

geq :: (Columnable a, Ord a, Eq a) => Expr a -> Expr a -> Expr Bool
geq :: forall a.
(Columnable a, Ord a, Eq a) =>
Expr a -> Expr a -> Expr Bool
geq = Text -> (a -> a -> Bool) -> Expr a -> Expr a -> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
Text -> (c -> b -> a) -> Expr c -> Expr b -> Expr a
BinOp Text
"geq" a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>=)

count :: Columnable a => Expr a -> Expr Int
count :: forall a. Columnable a => Expr a -> Expr Int
count (Col Text
name) = Text
-> Text
-> (forall (v :: * -> *) b.
    (Vector v b, Columnable b) =>
    v b -> Int)
-> Expr Int
forall a.
Columnable a =>
Text
-> Text
-> (forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> a)
-> Expr a
GeneralAggregate Text
name Text
"count" v b -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
forall (v :: * -> *) b. (Vector v b, Columnable b) => v b -> Int
VG.length
count Expr a
_ = [Char] -> Expr Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Argument can only be a column reference not an unevaluated expression"

minimum :: Columnable a => Expr a -> Expr a
minimum :: forall a. Columnable a => Expr a -> Expr a
minimum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"minimum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
min

maximum :: Columnable a => Expr a -> Expr a
maximum :: forall a. Columnable a => Expr a -> Expr a
maximum (Col Text
name) = Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
forall a.
Columnable a =>
Text
-> Text -> (forall a1. Columnable a1 => a1 -> a1 -> a1) -> Expr a
ReductionAggregate Text
name Text
"maximum" a1 -> a1 -> a1
forall a. Ord a => a -> a -> a
forall a1. Columnable a1 => a1 -> a1 -> a1
max

sum :: forall a . (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr a
sum :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr a
sum (Col Text
name) = Text -> Text -> (Vector a -> a) -> Expr a
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"sum" Vector a -> a
forall (v :: * -> *) a. (Vector v a, Num a) => v a -> a
VG.sum

mean :: (Columnable a, Num a, VU.Unbox a) => Expr a -> Expr Double
mean :: forall a. (Columnable a, Num a, Unbox a) => Expr a -> Expr Double
mean (Col Text
name) = let
        mean' :: v Double -> Double
mean' v Double
samp = let
                (!Double
total, !Int
n) = ((Double, Int) -> Double -> (Double, Int))
-> (Double, Int) -> v Double -> (Double, Int)
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
VG.foldl' (\(!Double
total, !Int
n) Double
v -> (Double
total Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
v, Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))  (Double
0 :: Double, Int
0 :: Int) v Double
samp
            in Double
total Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
    in Text -> Text -> (Vector Double -> Double) -> Expr Double
forall a b.
(Columnable a, Columnable b, Unbox a, Unbox b, Num a, Num b) =>
Text -> Text -> (Vector b -> a) -> Expr a
NumericAggregate Text
name Text
"mean" Vector Double -> Double
forall {v :: * -> *}. Vector v Double => v Double -> Double
mean'

-- See Section 2.4 of the Haskell Report https://www.haskell.org/definition/haskell2010.pdf
isReservedId :: T.Text -> Bool
isReservedId :: Text -> Bool
isReservedId Text
t = case Text
t of
  Text
"case"     -> Bool
True
  Text
"class"    -> Bool
True
  Text
"data"     -> Bool
True
  Text
"default"  -> Bool
True
  Text
"deriving" -> Bool
True
  Text
"do"       -> Bool
True
  Text
"else"     -> Bool
True
  Text
"foreign"  -> Bool
True
  Text
"if"       -> Bool
True
  Text
"import"   -> Bool
True
  Text
"in"       -> Bool
True
  Text
"infix"    -> Bool
True
  Text
"infixl"   -> Bool
True
  Text
"infixr"   -> Bool
True
  Text
"instance" -> Bool
True
  Text
"let"      -> Bool
True
  Text
"module"   -> Bool
True
  Text
"newtype"  -> Bool
True
  Text
"of"       -> Bool
True
  Text
"then"     -> Bool
True
  Text
"type"     -> Bool
True
  Text
"where"    -> Bool
True
  Text
_          -> Bool
False

isVarId :: T.Text -> Bool
isVarId :: Text -> Bool
isVarId Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
-- We might want to check  c == '_' || Char.isLower c
-- since the haskell report considers '_' a lowercase character
-- However, to prevent an edge case where a user may have a
-- "Name" and an "_Name_" in the same scope, wherein we'd end up
-- with duplicate "_Name_"s, we eschew the check for '_' here.
  Just (Char
c, Text
_) -> Char -> Bool
Char.isLower Char
c Bool -> Bool -> Bool
&& Char -> Bool
Char.isAlpha Char
c
  Maybe (Char, Text)
Nothing -> Bool
False

isHaskellIdentifier :: T.Text -> Bool
isHaskellIdentifier :: Text -> Bool
isHaskellIdentifier Text
t =  Bool -> Bool
not (Text -> Bool
isVarId Text
t) Bool -> Bool -> Bool
|| Text -> Bool
isReservedId Text
t

sanitize :: T.Text -> T.Text
sanitize :: Text -> Text
sanitize Text
t
  | Bool
isValid = Text
t
  | Text -> Bool
isHaskellIdentifier Text
t' = Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_"
  | Bool
otherwise = Text
t'
  where
    isValid :: Bool
isValid
      =  Bool -> Bool
not (Text -> Bool
isHaskellIdentifier Text
t)
      Bool -> Bool -> Bool
&& Text -> Bool
isVarId Text
t
      Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
Char.isAlphaNum Text
t
    t' :: Text
t' = (Char -> Char) -> Text -> Text
T.map Char -> Char
replaceInvalidCharacters (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
parentheses) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t
    replaceInvalidCharacters :: Char -> Char
replaceInvalidCharacters Char
c
      | Char -> Bool
Char.isUpper Char
c = Char -> Char
Char.toLower Char
c
      | Char -> Bool
Char.isSpace Char
c = Char
'_'
      | Char -> Bool
Char.isPunctuation Char
c = Char
'_' -- '-' will also become a '_'
      | Char -> Bool
Char.isSymbol Char
c = Char
'_'
      | Char -> Bool
Char.isAlphaNum Char
c = Char
c -- Blanket condition
      | Bool
otherwise = Char
'_' -- If we're unsure we'll default to an underscore
    parentheses :: Char -> Bool
parentheses Char
c = case Char
c of
      Char
'(' -> Bool
True
      Char
')' -> Bool
True
      Char
'{' -> Bool
True
      Char
'}' -> Bool
True
      Char
'[' -> Bool
True
      Char
']' -> Bool
True
      Char
_   -> Bool
False

typeFromString :: [String] -> Q Type
typeFromString :: [[Char]] -> Q Type
typeFromString []  = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No type specified"
typeFromString [[Char]
t] = do
  Maybe Name
maybeType <- [Char] -> Q (Maybe Name)
lookupTypeName [Char]
t
  case Maybe Name
maybeType of
    Just Name
name -> Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Name -> Type
ConT Name
name)
    Maybe Name
Nothing -> [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
t
typeFromString [[Char]
tycon,[Char]
t1] = do
  Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
  Type
inner <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
  Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT Type
outer Type
inner)
typeFromString [[Char]
tycon,[Char]
t1,[Char]
t2] = do
  Type
outer <- [[Char]] -> Q Type
typeFromString [[Char]
tycon]
  Type
lhs <- [[Char]] -> Q Type
typeFromString [[Char]
t1]
  Type
rhs <- [[Char]] -> Q Type
typeFromString [[Char]
t2]
  Type -> Q Type
forall a. a -> Q a
forall (m :: * -> *) a. Monad m => a -> m a
return (Type -> Type -> Type
AppT (Type -> Type -> Type
AppT Type
outer Type
lhs) Type
rhs)
typeFromString [[Char]]
s = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ([[Char]] -> [Char]
unwords [[Char]]
s)

declareColumns :: DataFrame -> DecsQ
declareColumns :: DataFrame -> DecsQ
declareColumns DataFrame
df = let
        names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd)([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
        types :: [[Char]]
types = (Text -> [Char]) -> [Text] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Column -> [Char]
columnTypeString (Column -> [Char]) -> (Text -> Column) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> DataFrame -> Column
`unsafeGetColumn` DataFrame
df)) [Text]
names
        specs :: [(Text, [Char])]
specs = (Text -> [Char] -> (Text, [Char]))
-> [Text] -> [[Char]] -> [(Text, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Text
name [Char]
type_ -> (Text -> Text
sanitize Text
name, [Char]
type_)) [Text]
names [[Char]]
types
    in ([[Dec]] -> [Dec]) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Dec]] -> [Dec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Q [[Dec]] -> DecsQ) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> a -> b
$ [(Text, [Char])] -> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Text, [Char])]
specs (((Text, [Char]) -> DecsQ) -> Q [[Dec]])
-> ((Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall a b. (a -> b) -> a -> b
$ \(Text
nm, [Char]
tyStr) -> do
        Text -> Q () -> Q ()
forall a b. Show a => a -> b -> b
traceShow Text
nm (() -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
        Type
ty  <- [[Char]] -> Q Type
typeFromString ([Char] -> [[Char]]
words [Char]
tyStr)
        let n :: Name
n  = [Char] -> Name
mkName (Text -> [Char]
T.unpack Text
nm)
        Dec
sig <- Name -> Q Type -> Q Dec
forall (m :: * -> *). Quote m => Name -> m Type -> m Dec
sigD Name
n [t| Expr $(Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Type
ty) |]
        Dec
val <- Q Pat -> Q Body -> [Q Dec] -> Q Dec
forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Dec
valD (Name -> Q Pat
forall (m :: * -> *). Quote m => Name -> m Pat
varP Name
n) (Q Exp -> Q Body
forall (m :: * -> *). Quote m => m Exp -> m Body
normalB [| col $(Text -> Q Exp
forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
forall (m :: * -> *). Quote m => Text -> m Exp
TH.lift Text
nm) |]) []
        [Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Dec
sig, Dec
val]