{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE UndecidableInstances #-}

module DataFrame.Functions (module DataFrame.Functions, module DataFrame.Operators) where

import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame (
    DataFrame (..),
    empty,
    unsafeGetColumn,
 )
import DataFrame.Internal.Expression hiding (normalize)
import DataFrame.Internal.Statistics
import DataFrame.Operations.Core

import Control.Applicative
import Control.Monad
import Control.Monad.IO.Class
import qualified Data.Char as Char
import Data.Either
import Data.Function
import Data.Functor
import Data.Int
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Maybe as Maybe
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Time
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Word
import qualified DataFrame.IO.CSV as CSV
import qualified DataFrame.IO.Parquet as Parquet
import DataFrame.IO.Parquet.Thrift
import DataFrame.IO.Parquet.Types (columnNullCount)
import DataFrame.Internal.Nullable (
    BaseType,
    NullLift1Op (applyNull1),
    NullLift1Result,
    NullLift2Op (applyNull2),
    NullLift2Result,
 )
import DataFrame.Operators
import Debug.Trace (trace)
import Language.Haskell.TH
import qualified Language.Haskell.TH.Syntax as TH
import System.Directory (doesDirectoryExist)
import System.FilePath ((</>))
import System.FilePath.Glob (glob)
import Text.Regex.TDFA
import Prelude hiding (maximum, minimum)
import Prelude as P

lift :: (Columnable a, Columnable b) => (a -> b) -> Expr a -> Expr b
lift :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr a -> Expr b
lift a -> b
f =
    UnaryOp a b -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary (MkUnaryOp{unaryFn :: a -> b
unaryFn = a -> b
f, unaryName :: Text
unaryName = Text
"unaryUdf", unarySymbol :: Maybe Text
unarySymbol = Maybe Text
forall a. Maybe a
Nothing})

lift2 ::
    (Columnable c, Columnable b, Columnable a) =>
    (c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 :: forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a) -> Expr c -> Expr b -> Expr a
lift2 c -> b -> a
f =
    BinaryOp c b a -> Expr c -> Expr b -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
        ( MkBinaryOp
            { binaryFn :: c -> b -> a
binaryFn = c -> b -> a
f
            , binaryName :: Text
binaryName = Text
"binaryUdf"
            , binarySymbol :: Maybe Text
binarySymbol = Maybe Text
forall a. Maybe a
Nothing
            , binaryCommutative :: Bool
binaryCommutative = Bool
False
            , binaryPrecedence :: Int
binaryPrecedence = Int
0
            }
        )

{- | Lift a unary function over a nullable or non-nullable column expression.
When the input is @Maybe a@, 'Nothing' short-circuits (like 'fmap').
When the input is plain @a@, the function is applied directly.

The return type is inferred via 'NullLift1Result': no annotation needed.
-}
nullLift ::
    (NullLift1Op a r (NullLift1Result a r), Columnable (NullLift1Result a r)) =>
    (BaseType a -> r) ->
    Expr a ->
    Expr (NullLift1Result a r)
nullLift :: forall a r.
(NullLift1Op a r (NullLift1Result a r),
 Columnable (NullLift1Result a r)) =>
(BaseType a -> r) -> Expr a -> Expr (NullLift1Result a r)
nullLift BaseType a -> r
f =
    UnaryOp a (NullLift1Result a r)
-> Expr a -> Expr (NullLift1Result a r)
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary
        (MkUnaryOp{unaryFn :: a -> NullLift1Result a r
unaryFn = (BaseType a -> r) -> a -> NullLift1Result a r
forall a r c. NullLift1Op a r c => (BaseType a -> r) -> a -> c
applyNull1 BaseType a -> r
f, unaryName :: Text
unaryName = Text
"nullLift", unarySymbol :: Maybe Text
unarySymbol = Maybe Text
forall a. Maybe a
Nothing})

{- | Lift a binary function over nullable or non-nullable column expressions.
Any 'Nothing' operand short-circuits to 'Nothing' in the result.

The return type is inferred via 'NullLift2Result': no annotation needed.
-}
nullLift2 ::
    (NullLift2Op a b r (NullLift2Result a b r), Columnable (NullLift2Result a b r)) =>
    (BaseType a -> BaseType b -> r) ->
    Expr a ->
    Expr b ->
    Expr (NullLift2Result a b r)
nullLift2 :: forall a b r.
(NullLift2Op a b r (NullLift2Result a b r),
 Columnable (NullLift2Result a b r)) =>
(BaseType a -> BaseType b -> r)
-> Expr a -> Expr b -> Expr (NullLift2Result a b r)
nullLift2 BaseType a -> BaseType b -> r
f =
    BinaryOp a b (NullLift2Result a b r)
-> Expr a -> Expr b -> Expr (NullLift2Result a b r)
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
BinaryOp c b a -> Expr c -> Expr b -> Expr a
Binary
        ( MkBinaryOp
            { binaryFn :: a -> b -> NullLift2Result a b r
binaryFn = (BaseType a -> BaseType b -> r) -> a -> b -> NullLift2Result a b r
forall a b r c.
NullLift2Op a b r c =>
(BaseType a -> BaseType b -> r) -> a -> b -> c
applyNull2 BaseType a -> BaseType b -> r
f
            , binaryName :: Text
binaryName = Text
"nullLift2"
            , binarySymbol :: Maybe Text
binarySymbol = Maybe Text
forall a. Maybe a
Nothing
            , binaryCommutative :: Bool
binaryCommutative = Bool
False
            , binaryPrecedence :: Int
binaryPrecedence = Int
0
            }
        )

{- | Lenient numeric \/ text coercion returning @Maybe a@.  Looks up column
@name@ and coerces its values to @a@.  Values that cannot be converted
(parse failures, type mismatches) become 'Nothing'; successfully converted
values are wrapped in 'Just'.  Existing 'Nothing' in optional source columns
stays as 'Nothing'.
-}
cast :: forall a. (Columnable a) => T.Text -> Expr (Maybe a)
cast :: forall a. Columnable a => Text -> Expr (Maybe a)
cast Text
name = Text -> Text -> (Either [Char] a -> Maybe a) -> Expr (Maybe a)
forall a1 a.
(Columnable a1, Columnable a) =>
Text -> Text -> (Either [Char] a1 -> a) -> Expr a
CastWith Text
name Text
"cast" (([Char] -> Maybe a) -> (a -> Maybe a) -> Either [Char] a -> Maybe a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe a -> [Char] -> Maybe a
forall a b. a -> b -> a
const Maybe a
forall a. Maybe a
Nothing) a -> Maybe a
forall a. a -> Maybe a
Just)

{- | Lenient coercion that substitutes a default for unconvertible values.
Looks up column @name@, coerces its values to @a@, and uses @def@ wherever
conversion fails or the source value is 'Nothing'.
-}
castWithDefault :: forall a. (Columnable a) => a -> T.Text -> Expr a
castWithDefault :: forall a. Columnable a => a -> Text -> Expr a
castWithDefault a
def Text
name =
    Text -> Text -> (Either [Char] a -> a) -> Expr a
forall a1 a.
(Columnable a1, Columnable a) =>
Text -> Text -> (Either [Char] a1 -> a) -> Expr a
CastWith Text
name (Text
"castWithDefault:" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (a -> [Char]
forall a. Show a => a -> [Char]
show a
def)) (a -> Either [Char] a -> a
forall b a. b -> Either a b -> b
fromRight a
def)

{- | Lenient coercion returning @Either T.Text a@.  Successfully converted
values are 'Right'; values that cannot be parsed are kept as 'Left' with
their original string representation, so the caller can inspect or handle
them downstream.  Existing 'Nothing' in optional source columns becomes
@Left \"null\"@.
-}
castEither :: forall a. (Columnable a) => T.Text -> Expr (Either T.Text a)
castEither :: forall a. Columnable a => Text -> Expr (Either Text a)
castEither Text
name = Text
-> Text
-> (Either [Char] a -> Either Text a)
-> Expr (Either Text a)
forall a1 a.
(Columnable a1, Columnable a) =>
Text -> Text -> (Either [Char] a1 -> a) -> Expr a
CastWith Text
name Text
"castEither" (([Char] -> Either Text a)
-> (a -> Either Text a) -> Either [Char] a -> Either Text a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Text -> Either Text a
forall a b. a -> Either a b
Left (Text -> Either Text a)
-> ([Char] -> Text) -> [Char] -> Either Text a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack) a -> Either Text a
forall a b. b -> Either a b
Right)

{- | Lenient coercion for assertedly non-nullable columns.
Substitutes @error@ for @Nothing@, so it will crash at evaluation time if
any @Nothing@ is actually encountered.  For non-nullable and
fully-populated nullable columns no cost is paid.
-}
unsafeCast :: forall a. (Columnable a) => T.Text -> Expr a
unsafeCast :: forall a. Columnable a => Text -> Expr a
unsafeCast Text
name =
    Text -> Text -> (Either [Char] a -> a) -> Expr a
forall a1 a.
(Columnable a1, Columnable a) =>
Text -> Text -> (Either [Char] a1 -> a) -> Expr a
CastWith
        Text
name
        Text
"unsafeCast"
        (a -> Either [Char] a -> a
forall b a. b -> Either a b -> b
fromRight ([Char] -> a
forall a. HasCallStack => [Char] -> a
error [Char]
"unsafeCast: unexpected Nothing in column"))

castExpr ::
    forall b src. (Columnable b, Columnable src) => Expr src -> Expr (Maybe b)
castExpr :: forall b src.
(Columnable b, Columnable src) =>
Expr src -> Expr (Maybe b)
castExpr = forall a1 a src.
(Columnable a1, Columnable a, Columnable src) =>
Text -> (Either [Char] a1 -> a) -> Expr src -> Expr a
CastExprWith @b @(Maybe b) @src Text
"castExpr" (([Char] -> Maybe b) -> (b -> Maybe b) -> Either [Char] b -> Maybe b
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe b -> [Char] -> Maybe b
forall a b. a -> b -> a
const Maybe b
forall a. Maybe a
Nothing) b -> Maybe b
forall a. a -> Maybe a
Just)

castExprWithDefault ::
    forall b src. (Columnable b, Columnable src) => b -> Expr src -> Expr b
castExprWithDefault :: forall b src.
(Columnable b, Columnable src) =>
b -> Expr src -> Expr b
castExprWithDefault b
def =
    forall a1 a src.
(Columnable a1, Columnable a, Columnable src) =>
Text -> (Either [Char] a1 -> a) -> Expr src -> Expr a
CastExprWith @b @b @src
        (Text
"castExprWithDefault:" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (b -> [Char]
forall a. Show a => a -> [Char]
show b
def))
        (b -> Either [Char] b -> b
forall b a. b -> Either a b -> b
fromRight b
def)

castExprEither ::
    forall b src.
    (Columnable b, Columnable src) => Expr src -> Expr (Either T.Text b)
castExprEither :: forall b src.
(Columnable b, Columnable src) =>
Expr src -> Expr (Either Text b)
castExprEither =
    forall a1 a src.
(Columnable a1, Columnable a, Columnable src) =>
Text -> (Either [Char] a1 -> a) -> Expr src -> Expr a
CastExprWith @b @(Either T.Text b) @src
        Text
"castExprEither"
        (([Char] -> Either Text b)
-> (b -> Either Text b) -> Either [Char] b -> Either Text b
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Text -> Either Text b
forall a b. a -> Either a b
Left (Text -> Either Text b)
-> ([Char] -> Text) -> [Char] -> Either Text b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack) b -> Either Text b
forall a b. b -> Either a b
Right)

unsafeCastExpr ::
    forall b src. (Columnable b, Columnable src) => Expr src -> Expr b
unsafeCastExpr :: forall b src. (Columnable b, Columnable src) => Expr src -> Expr b
unsafeCastExpr =
    forall a1 a src.
(Columnable a1, Columnable a, Columnable src) =>
Text -> (Either [Char] a1 -> a) -> Expr src -> Expr a
CastExprWith @b @b @src
        Text
"unsafeCastExpr"
        (b -> Either [Char] b -> b
forall b a. b -> Either a b -> b
fromRight ([Char] -> b
forall a. HasCallStack => [Char] -> a
error [Char]
"unsafeCastExpr: unexpected Nothing in column"))

toDouble :: (Columnable a, Real a) => Expr a -> Expr Double
toDouble :: forall a. (Columnable a, Real a) => Expr a -> Expr Double
toDouble =
    UnaryOp a Double -> Expr a -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary
        ( MkUnaryOp
            { unaryFn :: a -> Double
unaryFn = a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac
            , unaryName :: Text
unaryName = Text
"toDouble"
            , unarySymbol :: Maybe Text
unarySymbol = Maybe Text
forall a. Maybe a
Nothing
            }
        )

infix 8 `div`
div :: (Integral a, Columnable a) => Expr a -> Expr a -> Expr a
div :: forall a. (Integral a, Columnable a) => Expr a -> Expr a -> Expr a
div = (a -> a -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> a
forall a. Integral a => a -> a -> a
Prelude.div Text
"div" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"//") Bool
False Int
7

mod :: (Integral a, Columnable a) => Expr a -> Expr a -> Expr a
mod :: forall a. (Integral a, Columnable a) => Expr a -> Expr a -> Expr a
mod = (a -> a -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> a
forall a. Integral a => a -> a -> a
Prelude.mod Text
"mod" Maybe Text
forall a. Maybe a
Nothing Bool
False Int
7

eq :: (Columnable a, Eq a, a ~ BaseType a) => Expr a -> Expr a -> Expr Bool
eq :: forall a.
(Columnable a, Eq a, a ~ BaseType a) =>
Expr a -> Expr a -> Expr Bool
eq = (a -> a -> Bool)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr a
-> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==) Text
"eq" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"==") Bool
True Int
4

lt :: (Columnable a, Ord a, a ~ BaseType a) => Expr a -> Expr a -> Expr Bool
lt :: forall a.
(Columnable a, Ord a, a ~ BaseType a) =>
Expr a -> Expr a -> Expr Bool
lt = (a -> a -> Bool)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr a
-> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<) Text
"lt" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"<") Bool
False Int
4

gt :: (Columnable a, Ord a, a ~ BaseType a) => Expr a -> Expr a -> Expr Bool
gt :: forall a.
(Columnable a, Ord a, a ~ BaseType a) =>
Expr a -> Expr a -> Expr Bool
gt = (a -> a -> Bool)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr a
-> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>) Text
"gt" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
">") Bool
False Int
4

leq ::
    (Columnable a, Ord a, Eq a, a ~ BaseType a) => Expr a -> Expr a -> Expr Bool
leq :: forall a.
(Columnable a, Ord a, Eq a, a ~ BaseType a) =>
Expr a -> Expr a -> Expr Bool
leq = (a -> a -> Bool)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr a
-> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(<=) Text
"leq" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"<=") Bool
False Int
4

geq ::
    (Columnable a, Ord a, Eq a, a ~ BaseType a) => Expr a -> Expr a -> Expr Bool
geq :: forall a.
(Columnable a, Ord a, Eq a, a ~ BaseType a) =>
Expr a -> Expr a -> Expr Bool
geq = (a -> a -> Bool)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr a
-> Expr Bool
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> Bool
forall a. Ord a => a -> a -> Bool
(>=) Text
"geq" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
">=") Bool
False Int
4

and :: Expr Bool -> Expr Bool -> Expr Bool
and :: Expr Bool -> Expr Bool -> Expr Bool
and = Expr Bool -> Expr Bool -> Expr Bool
Expr Bool -> Expr Bool -> Expr (NullCmpResult Bool Bool)
forall a b.
(NullableCmpOp a b (NullCmpResult a b), BaseType a ~ Bool) =>
Expr a -> Expr b -> Expr (NullCmpResult a b)
(.&&)

or :: Expr Bool -> Expr Bool -> Expr Bool
or :: Expr Bool -> Expr Bool -> Expr Bool
or = Expr Bool -> Expr Bool -> Expr Bool
Expr Bool -> Expr Bool -> Expr (NullCmpResult Bool Bool)
forall a b.
(NullableCmpOp a b (NullCmpResult a b), BaseType a ~ Bool) =>
Expr a -> Expr b -> Expr (NullCmpResult a b)
(.||)

not :: Expr Bool -> Expr Bool
not :: Expr Bool -> Expr Bool
not =
    UnaryOp Bool Bool -> Expr Bool -> Expr Bool
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary
        (MkUnaryOp{unaryFn :: Bool -> Bool
unaryFn = Bool -> Bool
Prelude.not, unaryName :: Text
unaryName = Text
"not", unarySymbol :: Maybe Text
unarySymbol = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"~"})

count :: (Columnable a) => Expr a -> Expr Int
count :: forall a. Columnable a => Expr a -> Expr Int
count = AggStrategy Int a -> Expr a -> Expr Int
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text
-> Int
-> (Int -> a -> Int)
-> (Int -> Int -> Int)
-> (Int -> Int)
-> AggStrategy Int a
forall acc b a.
Columnable acc =>
Text
-> acc
-> (acc -> b -> acc)
-> (acc -> acc -> acc)
-> (acc -> a)
-> AggStrategy a b
MergeAgg Text
"count" (Int
0 :: Int) (\Int
c a
_ -> Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int -> Int
forall a. a -> a
id)

collect :: (Columnable a) => Expr a -> Expr [a]
collect :: forall a. Columnable a => Expr a -> Expr [a]
collect = AggStrategy [a] a -> Expr a -> Expr [a]
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> Maybe [a] -> ([a] -> a -> [a]) -> AggStrategy [a] a
forall a b. Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
FoldAgg Text
"collect" ([a] -> Maybe [a]
forall a. a -> Maybe a
Just []) ((a -> [a] -> [a]) -> [a] -> a -> [a]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (:)))

mode :: (Ord a, Columnable a, Eq a) => Expr a -> Expr a
mode :: forall a. (Ord a, Columnable a, Eq a) => Expr a -> Expr a
mode =
    AggStrategy a a -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg
        ( Text -> (Vector a -> a) -> AggStrategy a a
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg
            Text
"mode"
            ( (a, Integer) -> a
forall a b. (a, b) -> a
fst
                ((a, Integer) -> a) -> (Vector a -> (a, Integer)) -> Vector a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, Integer) -> (a, Integer) -> Ordering)
-> [(a, Integer)] -> (a, Integer)
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
L.maximumBy (Integer -> Integer -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Integer -> Integer -> Ordering)
-> ((a, Integer) -> Integer)
-> (a, Integer)
-> (a, Integer)
-> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (a, Integer) -> Integer
forall a b. (a, b) -> b
snd)
                ([(a, Integer)] -> (a, Integer))
-> (Vector a -> [(a, Integer)]) -> Vector a -> (a, Integer)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map a Integer -> [(a, Integer)]
forall k a. Map k a -> [(k, a)]
M.toList
                (Map a Integer -> [(a, Integer)])
-> (Vector a -> Map a Integer) -> Vector a -> [(a, Integer)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Map a Integer -> a -> Map a Integer)
-> Map a Integer -> Vector a -> Map a Integer
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl' (\Map a Integer
m a
e -> (Integer -> Integer -> Integer)
-> a -> Integer -> Map a Integer -> Map a Integer
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
(+) a
e Integer
1 Map a Integer
m) Map a Integer
forall k a. Map k a
M.empty
            )
        )

minimum :: (Columnable a, Ord a) => Expr a -> Expr a
minimum :: forall a. (Columnable a, Ord a) => Expr a -> Expr a
minimum = AggStrategy a a -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> Maybe a -> (a -> a -> a) -> AggStrategy a a
forall a b. Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
FoldAgg Text
"minimum" Maybe a
forall a. Maybe a
Nothing a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.min)

maximum :: (Columnable a, Ord a) => Expr a -> Expr a
maximum :: forall a. (Columnable a, Ord a) => Expr a -> Expr a
maximum = AggStrategy a a -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> Maybe a -> (a -> a -> a) -> AggStrategy a a
forall a b. Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
FoldAgg Text
"maximum" Maybe a
forall a. Maybe a
Nothing a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.max)

sum :: forall a. (Columnable a, Num a) => Expr a -> Expr a
sum :: forall a. (Columnable a, Num a) => Expr a -> Expr a
sum = AggStrategy a a -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> Maybe a -> (a -> a -> a) -> AggStrategy a a
forall a b. Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
FoldAgg Text
"sum" Maybe a
forall a. Maybe a
Nothing a -> a -> a
forall a. Num a => a -> a -> a
(+))
{-# SPECIALIZE DataFrame.Functions.sum :: Expr Double -> Expr Double #-}
{-# SPECIALIZE DataFrame.Functions.sum :: Expr Int -> Expr Int #-}
{-# INLINEABLE DataFrame.Functions.sum #-}

sumMaybe :: forall a. (Columnable a, Num a) => Expr (Maybe a) -> Expr a
sumMaybe :: forall a. (Columnable a, Num a) => Expr (Maybe a) -> Expr a
sumMaybe = AggStrategy a (Maybe a) -> Expr (Maybe a) -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> (Vector (Maybe a) -> a) -> AggStrategy a (Maybe a)
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"sumMaybe" ([a] -> a
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
P.sum ([a] -> a) -> (Vector (Maybe a) -> [a]) -> Vector (Maybe a) -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe a] -> [a]
forall a. [Maybe a] -> [a]
Maybe.catMaybes ([Maybe a] -> [a])
-> (Vector (Maybe a) -> [Maybe a]) -> Vector (Maybe a) -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> [Maybe a]
forall a. Vector a -> [a]
V.toList))

mean :: (Columnable a, Real a) => Expr a -> Expr Double
mean :: forall a. (Columnable a, Real a) => Expr a -> Expr Double
mean =
    AggStrategy Double a -> Expr a -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg
        ( Text
-> MeanAcc
-> (MeanAcc -> a -> MeanAcc)
-> (MeanAcc -> MeanAcc -> MeanAcc)
-> (MeanAcc -> Double)
-> AggStrategy Double a
forall acc b a.
Columnable acc =>
Text
-> acc
-> (acc -> b -> acc)
-> (acc -> acc -> acc)
-> (acc -> a)
-> AggStrategy a b
MergeAgg
            Text
"mean"
            (Double -> Int -> MeanAcc
MeanAcc Double
0.0 Int
0)
            (\(MeanAcc Double
s Int
c) a
x -> Double -> Int -> MeanAcc
MeanAcc (Double
s Double -> Double -> Double
forall a. Num a => a -> a -> a
+ a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac a
x) (Int
c Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))
            (\(MeanAcc Double
s1 Int
c1) (MeanAcc Double
s2 Int
c2) -> Double -> Int -> MeanAcc
MeanAcc (Double
s1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
s2) (Int
c1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
c2))
            (\(MeanAcc Double
s Int
c) -> if Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Double
0 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
0 else Double
s Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c)
        )

meanMaybe :: forall a. (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
meanMaybe :: forall a. (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
meanMaybe = AggStrategy Double (Maybe a) -> Expr (Maybe a) -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text
-> (Vector (Maybe a) -> Double) -> AggStrategy Double (Maybe a)
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"meanMaybe" (Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
mean' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector))

variance :: (Columnable a, Real a, VU.Unbox a) => Expr a -> Expr Double
variance :: forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
variance = AggStrategy Double a -> Expr a -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> (Vector a -> Double) -> AggStrategy Double a
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"variance" Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance')

median :: (Columnable a, Real a, VU.Unbox a) => Expr a -> Expr Double
median :: forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
median = AggStrategy Double a -> Expr a -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> (Vector a -> Double) -> AggStrategy Double a
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"median" Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median')

medianMaybe :: (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
medianMaybe :: forall a. (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
medianMaybe = AggStrategy Double (Maybe a) -> Expr (Maybe a) -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text
-> (Vector (Maybe a) -> Double) -> AggStrategy Double (Maybe a)
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"meanMaybe" (Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
median' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector))

optionalToDoubleVector :: (Real a) => V.Vector (Maybe a) -> VU.Vector Double
optionalToDoubleVector :: forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector =
    [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
VU.fromList
        ([Double] -> Vector Double)
-> (Vector (Maybe a) -> [Double])
-> Vector (Maybe a)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> Maybe a -> [Double])
-> [Double] -> Vector (Maybe a) -> [Double]
forall a b. (a -> b -> a) -> a -> Vector b -> a
V.foldl'
            (\[Double]
acc Maybe a
e -> if Maybe a -> Bool
forall a. Maybe a -> Bool
Maybe.isJust Maybe a
e then a -> Double
forall a b. (Real a, Fractional b) => a -> b
realToFrac (a -> Maybe a -> a
forall a. a -> Maybe a -> a
Maybe.fromMaybe a
0 Maybe a
e) Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: [Double]
acc else [Double]
acc)
            []

percentile :: Int -> Expr Double -> Expr Double
percentile :: Int -> Expr Double -> Expr Double
percentile Int
n =
    AggStrategy Double Double -> Expr Double -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg
        ( Text -> (Vector Double -> Double) -> AggStrategy Double Double
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg
            ([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char]
"percentile " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
n)
            (Int -> Vector Double -> Double
forall a. (Unbox a, Num a, Real a) => Int -> Vector a -> Double
percentile' Int
n)
        )

stddev :: (Columnable a, Real a, VU.Unbox a) => Expr a -> Expr Double
stddev :: forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev = AggStrategy Double a -> Expr a -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> (Vector a -> Double) -> AggStrategy Double a
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"stddev" (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double) -> (Vector a -> Double) -> Vector a -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector a -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance'))

stddevMaybe :: forall a. (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
stddevMaybe :: forall a. (Columnable a, Real a) => Expr (Maybe a) -> Expr Double
stddevMaybe = AggStrategy Double (Maybe a) -> Expr (Maybe a) -> Expr Double
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text
-> (Vector (Maybe a) -> Double) -> AggStrategy Double (Maybe a)
forall (v :: * -> *) b a.
(Vector v b, Typeable v) =>
Text -> (v b -> a) -> AggStrategy a b
CollectAgg Text
"stddevMaybe" (Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Vector (Maybe a) -> Double) -> Vector (Maybe a) -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Double
forall a. (Real a, Unbox a) => Vector a -> Double
variance' (Vector Double -> Double)
-> (Vector (Maybe a) -> Vector Double)
-> Vector (Maybe a)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Maybe a) -> Vector Double
forall a. Real a => Vector (Maybe a) -> Vector Double
optionalToDoubleVector))

zScore :: Expr Double -> Expr Double
zScore :: Expr Double -> Expr Double
zScore Expr Double
c = (Expr Double
c Expr Double -> Expr Double -> Expr Double
forall a. Num a => a -> a -> a
- Expr Double -> Expr Double
forall a. (Columnable a, Real a) => Expr a -> Expr Double
mean Expr Double
c) Expr Double -> Expr Double -> Expr Double
forall a. Fractional a => a -> a -> a
/ Expr Double -> Expr Double
forall a. (Columnable a, Real a, Unbox a) => Expr a -> Expr Double
stddev Expr Double
c

pow :: (Columnable a, Num a) => Expr a -> Int -> Expr a
pow :: forall a. (Columnable a, Num a) => Expr a -> Int -> Expr a
pow Expr a
expr Int
i = (a -> Int -> a)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr a
-> Expr Int
-> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> Int -> a
forall a b. (Num a, Integral b) => a -> b -> a
(^) Text
"pow" (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"^") Bool
True Int
8 Expr a
expr (Int -> Expr Int
forall a. Columnable a => a -> Expr a
Lit Int
i)

relu :: (Columnable a, Num a, Ord a) => Expr a -> Expr a
relu :: forall a. (Columnable a, Num a, Ord a) => Expr a -> Expr a
relu = (a -> a) -> Text -> Maybe Text -> Expr a -> Expr a
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated (a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.max a
0) Text
"relu" Maybe Text
forall a. Maybe a
Nothing

min :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
min :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
min = (a -> a -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.min Text
"min" Maybe Text
forall a. Maybe a
Nothing Bool
True Int
1

max :: (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
max :: forall a. (Columnable a, Ord a) => Expr a -> Expr a -> Expr a
max = (a -> a -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr a -> Expr a -> Expr a
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated a -> a -> a
forall a. Ord a => a -> a -> a
Prelude.max Text
"max" Maybe Text
forall a. Maybe a
Nothing Bool
True Int
1

reduce ::
    forall a b.
    (Columnable a, Columnable b) => Expr b -> a -> (a -> b -> a) -> Expr a
reduce :: forall a b.
(Columnable a, Columnable b) =>
Expr b -> a -> (a -> b -> a) -> Expr a
reduce Expr b
expr a
start a -> b -> a
f = AggStrategy a b -> Expr b -> Expr a
forall a b.
(Columnable a, Columnable b) =>
AggStrategy a b -> Expr b -> Expr a
Agg (Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
forall a b. Text -> Maybe a -> (a -> b -> a) -> AggStrategy a b
FoldAgg Text
"foldUdf" (a -> Maybe a
forall a. a -> Maybe a
Just a
start) a -> b -> a
f) Expr b
expr

toMaybe :: (Columnable a) => Expr a -> Expr (Maybe a)
toMaybe :: forall a. Columnable a => Expr a -> Expr (Maybe a)
toMaybe = (a -> Maybe a) -> Text -> Maybe Text -> Expr a -> Expr (Maybe a)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated a -> Maybe a
forall a. a -> Maybe a
Just Text
"toMaybe" Maybe Text
forall a. Maybe a
Nothing

fromMaybe :: (Columnable a) => a -> Expr (Maybe a) -> Expr a
fromMaybe :: forall a. Columnable a => a -> Expr (Maybe a) -> Expr a
fromMaybe a
d = (Maybe a -> a) -> Text -> Maybe Text -> Expr (Maybe a) -> Expr a
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated (a -> Maybe a -> a
forall a. a -> Maybe a -> a
Maybe.fromMaybe a
d) Text
"fromMaybe" Maybe Text
forall a. Maybe a
Nothing

isJust :: (Columnable a) => Expr (Maybe a) -> Expr Bool
isJust :: forall a. Columnable a => Expr (Maybe a) -> Expr Bool
isJust = (Maybe a -> Bool)
-> Text -> Maybe Text -> Expr (Maybe a) -> Expr Bool
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated Maybe a -> Bool
forall a. Maybe a -> Bool
Maybe.isJust Text
"isJust" Maybe Text
forall a. Maybe a
Nothing

isNothing :: (Columnable a) => Expr (Maybe a) -> Expr Bool
isNothing :: forall a. Columnable a => Expr (Maybe a) -> Expr Bool
isNothing = (Maybe a -> Bool)
-> Text -> Maybe Text -> Expr (Maybe a) -> Expr Bool
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated Maybe a -> Bool
forall a. Maybe a -> Bool
Maybe.isNothing Text
"isNothing" Maybe Text
forall a. Maybe a
Nothing

fromJust :: (Columnable a) => Expr (Maybe a) -> Expr a
fromJust :: forall a. Columnable a => Expr (Maybe a) -> Expr a
fromJust = (Maybe a -> a) -> Text -> Maybe Text -> Expr (Maybe a) -> Expr a
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated Maybe a -> a
forall a. HasCallStack => Maybe a -> a
Maybe.fromJust Text
"fromJust" Maybe Text
forall a. Maybe a
Nothing

whenPresent ::
    forall a b.
    (Columnable a, Columnable b) => (a -> b) -> Expr (Maybe a) -> Expr (Maybe b)
whenPresent :: forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Expr (Maybe a) -> Expr (Maybe b)
whenPresent a -> b
f = (Maybe a -> Maybe b)
-> Text -> Maybe Text -> Expr (Maybe a) -> Expr (Maybe b)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated ((a -> b) -> Maybe a -> Maybe b
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap a -> b
f) Text
"whenPresent" Maybe Text
forall a. Maybe a
Nothing

whenBothPresent ::
    forall a b c.
    (Columnable a, Columnable b, Columnable c) =>
    (a -> b -> c) -> Expr (Maybe a) -> Expr (Maybe b) -> Expr (Maybe c)
whenBothPresent :: forall a b c.
(Columnable a, Columnable b, Columnable c) =>
(a -> b -> c) -> Expr (Maybe a) -> Expr (Maybe b) -> Expr (Maybe c)
whenBothPresent a -> b -> c
f = (Maybe a -> Maybe b -> Maybe c)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr (Maybe a)
-> Expr (Maybe b)
-> Expr (Maybe c)
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated (\Maybe a
l Maybe b
r -> a -> b -> c
f (a -> b -> c) -> Maybe a -> Maybe (b -> c)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe a
l Maybe (b -> c) -> Maybe b -> Maybe c
forall a b. Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Maybe b
r) Text
"whenBothPresent" Maybe Text
forall a. Maybe a
Nothing Bool
False Int
0

recode ::
    forall a b.
    (Columnable a, Columnable b) => [(a, b)] -> Expr a -> Expr (Maybe b)
recode :: forall a b.
(Columnable a, Columnable b) =>
[(a, b)] -> Expr a -> Expr (Maybe b)
recode [(a, b)]
mapping =
    UnaryOp a (Maybe b) -> Expr a -> Expr (Maybe b)
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary
        ( MkUnaryOp
            { unaryFn :: a -> Maybe b
unaryFn = (a -> [(a, b)] -> Maybe b
forall a b. Eq a => a -> [(a, b)] -> Maybe b
`lookup` [(a, b)]
mapping)
            , unaryName :: Text
unaryName = Text
"recode " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack ([(a, b)] -> [Char]
forall a. Show a => a -> [Char]
show [(a, b)]
mapping)
            , unarySymbol :: Maybe Text
unarySymbol = Maybe Text
forall a. Maybe a
Nothing
            }
        )

recodeWithCondition ::
    forall a b.
    (Columnable a, Columnable b) =>
    Expr b -> [(Expr a -> Expr Bool, b)] -> Expr a -> Expr b
recodeWithCondition :: forall a b.
(Columnable a, Columnable b) =>
Expr b -> [(Expr a -> Expr Bool, b)] -> Expr a -> Expr b
recodeWithCondition Expr b
fallback [] Expr a
value = Expr b
fallback
recodeWithCondition Expr b
fallback ((Expr a -> Expr Bool
cond, b
value) : [(Expr a -> Expr Bool, b)]
rest) Expr a
expr = Expr Bool -> Expr b -> Expr b -> Expr b
forall a. Columnable a => Expr Bool -> Expr a -> Expr a -> Expr a
ifThenElse (Expr a -> Expr Bool
cond Expr a
expr) (b -> Expr b
forall a. Columnable a => a -> Expr a
lit b
value) (Expr b -> [(Expr a -> Expr Bool, b)] -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
Expr b -> [(Expr a -> Expr Bool, b)] -> Expr a -> Expr b
recodeWithCondition Expr b
fallback [(Expr a -> Expr Bool, b)]
rest Expr a
expr)

recodeWithDefault ::
    forall a b.
    (Columnable a, Columnable b) => b -> [(a, b)] -> Expr a -> Expr b
recodeWithDefault :: forall a b.
(Columnable a, Columnable b) =>
b -> [(a, b)] -> Expr a -> Expr b
recodeWithDefault b
d [(a, b)]
mapping =
    UnaryOp a b -> Expr a -> Expr b
forall a b.
(Columnable a, Columnable b) =>
UnaryOp b a -> Expr b -> Expr a
Unary
        ( MkUnaryOp
            { unaryFn :: a -> b
unaryFn = b -> Maybe b -> b
forall a. a -> Maybe a -> a
Maybe.fromMaybe b
d (Maybe b -> b) -> (a -> Maybe b) -> a -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> [(a, b)] -> Maybe b
forall a b. Eq a => a -> [(a, b)] -> Maybe b
`lookup` [(a, b)]
mapping)
            , unaryName :: Text
unaryName =
                Text
"recodeWithDefault " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (b -> [Char]
forall a. Show a => a -> [Char]
show b
d) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack ([(a, b)] -> [Char]
forall a. Show a => a -> [Char]
show [(a, b)]
mapping)
            , unarySymbol :: Maybe Text
unarySymbol = Maybe Text
forall a. Maybe a
Nothing
            }
        )

firstOrNothing :: (Columnable a) => Expr [a] -> Expr (Maybe a)
firstOrNothing :: forall a. Columnable a => Expr [a] -> Expr (Maybe a)
firstOrNothing = ([a] -> Maybe a)
-> Text -> Maybe Text -> Expr [a] -> Expr (Maybe a)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated [a] -> Maybe a
forall a. [a] -> Maybe a
Maybe.listToMaybe Text
"firstOrNothing" Maybe Text
forall a. Maybe a
Nothing

lastOrNothing :: (Columnable a) => Expr [a] -> Expr (Maybe a)
lastOrNothing :: forall a. Columnable a => Expr [a] -> Expr (Maybe a)
lastOrNothing = ([a] -> Maybe a)
-> Text -> Maybe Text -> Expr [a] -> Expr (Maybe a)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated ([a] -> Maybe a
forall a. [a] -> Maybe a
Maybe.listToMaybe ([a] -> Maybe a) -> ([a] -> [a]) -> [a] -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [a]
forall a. [a] -> [a]
reverse) Text
"lastOrNothing" Maybe Text
forall a. Maybe a
Nothing

splitOn :: T.Text -> Expr T.Text -> Expr [T.Text]
splitOn :: Text -> Expr Text -> Expr [Text]
splitOn Text
delim = (Text -> [Text]) -> Text -> Maybe Text -> Expr Text -> Expr [Text]
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated (HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
delim) Text
"splitOn" Maybe Text
forall a. Maybe a
Nothing

match :: T.Text -> Expr T.Text -> Expr (Maybe T.Text)
match :: Text -> Expr Text -> Expr (Maybe Text)
match Text
regex =
    (Text -> Maybe Text)
-> Text -> Maybe Text -> Expr Text -> Expr (Maybe Text)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated
        ((\Text
r -> if Text -> Bool
T.null Text
r then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
r) (Text -> Maybe Text) -> (Text -> Text) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text -> Text
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ Text
regex))
        (Text
"match " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (Text -> [Char]
forall a. Show a => a -> [Char]
show Text
regex))
        Maybe Text
forall a. Maybe a
Nothing

matchAll :: T.Text -> Expr T.Text -> Expr [T.Text]
matchAll :: Text -> Expr Text -> Expr [Text]
matchAll Text
regex =
    (Text -> [Text]) -> Text -> Maybe Text -> Expr Text -> Expr [Text]
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated
        (AllTextMatches [] Text -> [Text]
forall (f :: * -> *) b. AllTextMatches f b -> f b
getAllTextMatches (AllTextMatches [] Text -> [Text])
-> (Text -> AllTextMatches [] Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text -> AllTextMatches [] Text
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ Text
regex))
        (Text
"matchAll " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (Text -> [Char]
forall a. Show a => a -> [Char]
show Text
regex))
        Maybe Text
forall a. Maybe a
Nothing

parseDate ::
    (ParseTime t, Columnable t) => T.Text -> Expr T.Text -> Expr (Maybe t)
parseDate :: forall t.
(ParseTime t, Columnable t) =>
Text -> Expr Text -> Expr (Maybe t)
parseDate Text
format =
    (Text -> Maybe t)
-> Text -> Maybe Text -> Expr Text -> Expr (Maybe t)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated
        (Bool -> TimeLocale -> [Char] -> [Char] -> Maybe t
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> [Char] -> [Char] -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale (Text -> [Char]
T.unpack Text
format) ([Char] -> Maybe t) -> (Text -> [Char]) -> Text -> Maybe t
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
T.unpack)
        (Text
"parseDate " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
format)
        Maybe Text
forall a. Maybe a
Nothing

daysBetween :: Expr Day -> Expr Day -> Expr Int
daysBetween :: Expr Day -> Expr Day -> Expr Int
daysBetween =
    (Day -> Day -> Int)
-> Text
-> Maybe Text
-> Bool
-> Int
-> Expr Day
-> Expr Day
-> Expr Int
forall c b a.
(Columnable c, Columnable b, Columnable a) =>
(c -> b -> a)
-> Text -> Maybe Text -> Bool -> Int -> Expr c -> Expr b -> Expr a
lift2Decorated
        (\Day
d1 Day
d2 -> Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Day -> Day -> Integer
diffDays Day
d1 Day
d2))
        Text
"daysBetween"
        Maybe Text
forall a. Maybe a
Nothing
        Bool
True
        Int
2

bind ::
    forall a b m.
    (Columnable a, Columnable (m a), Monad m, Columnable b, Columnable (m b)) =>
    (a -> m b) -> Expr (m a) -> Expr (m b)
bind :: forall a b (m :: * -> *).
(Columnable a, Columnable (m a), Monad m, Columnable b,
 Columnable (m b)) =>
(a -> m b) -> Expr (m a) -> Expr (m b)
bind a -> m b
f = (m a -> m b) -> Text -> Maybe Text -> Expr (m a) -> Expr (m b)
forall a b.
(Columnable a, Columnable b) =>
(a -> b) -> Text -> Maybe Text -> Expr a -> Expr b
liftDecorated (m a -> (a -> m b) -> m b
forall a b. m a -> (a -> m b) -> m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= a -> m b
f) Text
"bind" Maybe Text
forall a. Maybe a
Nothing

-- See Section 2.4 of the Haskell Report https://www.haskell.org/definition/haskell2010.pdf
isReservedId :: T.Text -> Bool
isReservedId :: Text -> Bool
isReservedId Text
t = case Text
t of
    Text
"case" -> Bool
True
    Text
"class" -> Bool
True
    Text
"data" -> Bool
True
    Text
"default" -> Bool
True
    Text
"deriving" -> Bool
True
    Text
"do" -> Bool
True
    Text
"else" -> Bool
True
    Text
"foreign" -> Bool
True
    Text
"if" -> Bool
True
    Text
"import" -> Bool
True
    Text
"in" -> Bool
True
    Text
"infix" -> Bool
True
    Text
"infixl" -> Bool
True
    Text
"infixr" -> Bool
True
    Text
"instance" -> Bool
True
    Text
"let" -> Bool
True
    Text
"module" -> Bool
True
    Text
"newtype" -> Bool
True
    Text
"of" -> Bool
True
    Text
"then" -> Bool
True
    Text
"type" -> Bool
True
    Text
"where" -> Bool
True
    Text
_ -> Bool
False

isVarId :: T.Text -> Bool
isVarId :: Text -> Bool
isVarId Text
t = case Text -> Maybe (Char, Text)
T.uncons Text
t of
    -- We might want to check  c == '_' || Char.isLower c
    -- since the haskell report considers '_' a lowercase character
    -- However, to prevent an edge case where a user may have a
    -- "Name" and an "_Name_" in the same scope, wherein we'd end up
    -- with duplicate "_Name_"s, we eschew the check for '_' here.
    Just (Char
c, Text
_) -> Char -> Bool
Char.isLower Char
c Bool -> Bool -> Bool
&& Char -> Bool
Char.isAlpha Char
c
    Maybe (Char, Text)
Nothing -> Bool
False

isHaskellIdentifier :: T.Text -> Bool
isHaskellIdentifier :: Text -> Bool
isHaskellIdentifier Text
t = Bool -> Bool
Prelude.not (Text -> Bool
isVarId Text
t) Bool -> Bool -> Bool
|| Text -> Bool
isReservedId Text
t

sanitize :: T.Text -> T.Text
sanitize :: Text -> Text
sanitize Text
t
    | Bool
isValid = Text
t
    | Text -> Bool
isHaskellIdentifier Text
t' = Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t' Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_"
    | Bool
otherwise = Text
t'
  where
    isValid :: Bool
isValid =
        Bool -> Bool
Prelude.not (Text -> Bool
isHaskellIdentifier Text
t)
            Bool -> Bool -> Bool
&& Text -> Bool
isVarId Text
t
            Bool -> Bool -> Bool
&& (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
Char.isAlphaNum Text
t
    t' :: Text
t' = (Char -> Char) -> Text -> Text
T.map Char -> Char
replaceInvalidCharacters (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Bool -> Bool
Prelude.not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
parentheses) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
t
    replaceInvalidCharacters :: Char -> Char
replaceInvalidCharacters Char
c
        | Char -> Bool
Char.isUpper Char
c = Char -> Char
Char.toLower Char
c
        | Char -> Bool
Char.isSpace Char
c = Char
'_'
        | Char -> Bool
Char.isPunctuation Char
c = Char
'_' -- '-' will also become a '_'
        | Char -> Bool
Char.isSymbol Char
c = Char
'_'
        | Char -> Bool
Char.isAlphaNum Char
c = Char
c -- Blanket condition
        | Bool
otherwise = Char
'_' -- If we're unsure we'll default to an underscore
    parentheses :: Char -> Bool
parentheses Char
c = case Char
c of
        Char
'(' -> Bool
True
        Char
')' -> Bool
True
        Char
'{' -> Bool
True
        Char
'}' -> Bool
True
        Char
'[' -> Bool
True
        Char
']' -> Bool
True
        Char
_ -> Bool
False

typeFromString :: [String] -> Q Type
typeFromString :: [[Char]] -> Q Type
typeFromString [] = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"No type specified"
typeFromString [[Char]
t0] = do
    let t :: [Char]
t = [Char] -> [Char]
normalize [Char]
t0
    case [Char] -> Maybe [Char]
stripBrackets [Char]
t of
        Just [Char]
inner -> [[Char]] -> Q Type
typeFromString [[Char]
inner] Q Type -> (Type -> Type) -> Q Type
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Type -> Type -> Type
AppT Type
ListT
        Maybe [Char]
Nothing
            | [Char]
t [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"Text" Bool -> Bool -> Bool
|| [Char]
t [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"Data.Text.Text" Bool -> Bool -> Bool
|| [Char]
t [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"T.Text" ->
                Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Name -> Type
ConT ''T.Text)
            | Bool
otherwise -> do
                Maybe Name
m <- [Char] -> Q (Maybe Name)
lookupTypeName [Char]
t
                case Maybe Name
m of
                    Just Name
name -> Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Name -> Type
ConT Name
name)
                    Maybe Name
Nothing -> [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported type: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
t0
typeFromString [[Char]
tycon, [Char]
t1] = Type -> Type -> Type
AppT (Type -> Type -> Type) -> Q Type -> Q (Type -> Type)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[Char]] -> Q Type
typeFromString [[Char]
tycon] Q (Type -> Type) -> Q Type -> Q Type
forall a b. Q (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [[Char]] -> Q Type
typeFromString [[Char]
t1]
typeFromString [[Char]
tycon, [Char]
t1, [Char]
t2] =
    (\Type
outer Type
a Type
b -> Type -> Type -> Type
AppT (Type -> Type -> Type
AppT Type
outer Type
a) Type
b)
        (Type -> Type -> Type -> Type)
-> Q Type -> Q (Type -> Type -> Type)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[Char]] -> Q Type
typeFromString [[Char]
tycon]
        Q (Type -> Type -> Type) -> Q Type -> Q (Type -> Type)
forall a b. Q (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [[Char]] -> Q Type
typeFromString [[Char]
t1]
        Q (Type -> Type) -> Q Type -> Q Type
forall a b. Q (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [[Char]] -> Q Type
typeFromString [[Char]
t2]
typeFromString [[Char]]
s = [Char] -> Q Type
forall a. [Char] -> Q a
forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail ([Char] -> Q Type) -> [Char] -> Q Type
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported types: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [[Char]] -> [Char]
unwords [[Char]]
s

normalize :: String -> String
normalize :: [Char] -> [Char]
normalize = (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
forall a. [a] -> [a]
reverse ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> [Char] -> [Char]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
forall a. [a] -> [a]
reverse

stripBrackets :: String -> Maybe String
stripBrackets :: [Char] -> Maybe [Char]
stripBrackets [Char]
s =
    case [Char]
s of
        (Char
'[' : [Char]
rest)
            | Bool -> Bool
P.not ([Char] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Char]
rest) Bool -> Bool -> Bool
&& [Char] -> Char
forall a. HasCallStack => [a] -> a
last [Char]
rest Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
']' ->
                [Char] -> Maybe [Char]
forall a. a -> Maybe a
Just ([Char] -> [Char]
forall a. HasCallStack => [a] -> [a]
init [Char]
rest)
        [Char]
_ -> Maybe [Char]
forall a. Maybe a
Nothing

declareColumnsFromCsvFile :: String -> DecsQ
declareColumnsFromCsvFile :: [Char] -> DecsQ
declareColumnsFromCsvFile [Char]
path = do
    DataFrame
df <-
        IO DataFrame -> Q DataFrame
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO
            (ReadOptions -> [Char] -> IO DataFrame
CSV.readSeparated (ReadOptions
CSV.defaultReadOptions{CSV.numColumns = Just 100}) [Char]
path)
    DataFrame -> DecsQ
declareColumns DataFrame
df

declareColumnsFromParquetFile :: String -> DecsQ
declareColumnsFromParquetFile :: [Char] -> DecsQ
declareColumnsFromParquetFile [Char]
path = do
    Bool
isDir <- IO Bool -> Q Bool
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> Q Bool) -> IO Bool -> Q Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> IO Bool
doesDirectoryExist [Char]
path
    let pat :: [Char]
pat = if Bool
isDir then [Char]
path [Char] -> [Char] -> [Char]
</> [Char]
"*.parquet" else [Char]
path
    [[Char]]
matches <- IO [[Char]] -> Q [[Char]]
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Char]] -> Q [[Char]]) -> IO [[Char]] -> Q [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char] -> IO [[Char]]
glob [Char]
pat
    [[Char]]
files <- IO [[Char]] -> Q [[Char]]
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [[Char]] -> Q [[Char]]) -> IO [[Char]] -> Q [[Char]]
forall a b. (a -> b) -> a -> b
$ ([Char] -> IO Bool) -> [[Char]] -> IO [[Char]]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM ((Bool -> Bool) -> IO Bool -> IO Bool
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Bool -> Bool
Prelude.not (IO Bool -> IO Bool) -> ([Char] -> IO Bool) -> [Char] -> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Bool
doesDirectoryExist) [[Char]]
matches
    [FileMetadata]
metas <- IO [FileMetadata] -> Q [FileMetadata]
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO [FileMetadata] -> Q [FileMetadata])
-> IO [FileMetadata] -> Q [FileMetadata]
forall a b. (a -> b) -> a -> b
$ ([Char] -> IO FileMetadata) -> [[Char]] -> IO [FileMetadata]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (((FileMetadata, ByteString) -> FileMetadata)
-> IO (FileMetadata, ByteString) -> IO FileMetadata
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (FileMetadata, ByteString) -> FileMetadata
forall a b. (a, b) -> a
fst (IO (FileMetadata, ByteString) -> IO FileMetadata)
-> ([Char] -> IO (FileMetadata, ByteString))
-> [Char]
-> IO FileMetadata
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO (FileMetadata, ByteString)
Parquet.readMetadataFromPath) [[Char]]
files
    let nullableCols :: S.Set T.Text
        nullableCols :: Set Text
nullableCols =
            [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList
                [ [Char] -> Text
T.pack ([[Char]] -> [Char]
forall a. HasCallStack => [a] -> a
last [[Char]]
colPath)
                | FileMetadata
meta <- [FileMetadata]
metas
                , RowGroup
rg <- FileMetadata -> [RowGroup]
rowGroups FileMetadata
meta
                , ColumnChunk
cc <- RowGroup -> [ColumnChunk]
rowGroupColumns RowGroup
rg
                , let cm :: ColumnMetaData
cm = ColumnChunk -> ColumnMetaData
columnMetaData ColumnChunk
cc
                      colPath :: [[Char]]
colPath = ColumnMetaData -> [[Char]]
columnPathInSchema ColumnMetaData
cm
                , Bool -> Bool
Prelude.not ([[Char]] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
colPath)
                , ColumnStatistics -> Int64
columnNullCount (ColumnMetaData -> ColumnStatistics
columnStatistics ColumnMetaData
cm) Int64 -> Int64 -> Bool
forall a. Ord a => a -> a -> Bool
> Int64
0
                ]
    let df :: DataFrame
df =
            (DataFrame -> FileMetadata -> DataFrame)
-> DataFrame -> [FileMetadata] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
                (\DataFrame
acc FileMetadata
meta -> DataFrame
acc DataFrame -> DataFrame -> DataFrame
forall a. Semigroup a => a -> a -> a
<> Set Text -> [SchemaElement] -> DataFrame
schemaToEmptyDataFrame Set Text
nullableCols (FileMetadata -> [SchemaElement]
schema FileMetadata
meta))
                DataFrame
DataFrame.Internal.DataFrame.empty
                [FileMetadata]
metas
    DataFrame -> DecsQ
declareColumns DataFrame
df

schemaToEmptyDataFrame :: S.Set T.Text -> [SchemaElement] -> DataFrame
schemaToEmptyDataFrame :: Set Text -> [SchemaElement] -> DataFrame
schemaToEmptyDataFrame Set Text
nullableCols [SchemaElement]
elems =
    let leafElems :: [SchemaElement]
leafElems = (SchemaElement -> Bool) -> [SchemaElement] -> [SchemaElement]
forall a. (a -> Bool) -> [a] -> [a]
filter (\SchemaElement
e -> SchemaElement -> Int32
numChildren SchemaElement
e Int32 -> Int32 -> Bool
forall a. Eq a => a -> a -> Bool
== Int32
0) [SchemaElement]
elems
     in [(Text, Column)] -> DataFrame
fromNamedColumns ((SchemaElement -> (Text, Column))
-> [SchemaElement] -> [(Text, Column)]
forall a b. (a -> b) -> [a] -> [b]
map (Set Text -> SchemaElement -> (Text, Column)
schemaElemToColumn Set Text
nullableCols) [SchemaElement]
leafElems)

schemaElemToColumn :: S.Set T.Text -> SchemaElement -> (T.Text, Column)
schemaElemToColumn :: Set Text -> SchemaElement -> (Text, Column)
schemaElemToColumn Set Text
nullableCols SchemaElement
elem =
    let name :: Text
name = SchemaElement -> Text
elementName SchemaElement
elem
        isNull :: Bool
isNull = Text
name Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
`S.member` Set Text
nullableCols
        col :: Column
col =
            if Bool
isNull
                then TType -> Column
emptyNullableColumnForType (SchemaElement -> TType
elementType SchemaElement
elem)
                else TType -> Column
emptyColumnForType (SchemaElement -> TType
elementType SchemaElement
elem)
     in (Text
name, Column
col)

emptyColumnForType :: TType -> Column
emptyColumnForType :: TType -> Column
emptyColumnForType = \case
    TType
BOOL -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Bool []
    TType
BYTE -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Word8 []
    TType
I16 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Int16 []
    TType
I32 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Int32 []
    TType
I64 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Int64 []
    TType
I96 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Int64 []
    TType
FLOAT -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Float []
    TType
DOUBLE -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @Double []
    TType
STRING -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @T.Text []
    TType
other -> [Char] -> Column
forall a. HasCallStack => [Char] -> a
error ([Char] -> Column) -> [Char] -> Column
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported parquet type for column: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> TType -> [Char]
forall a. Show a => a -> [Char]
show TType
other

emptyNullableColumnForType :: TType -> Column
emptyNullableColumnForType :: TType -> Column
emptyNullableColumnForType = \case
    TType
BOOL -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Bool) []
    TType
BYTE -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Word8) []
    TType
I16 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Int16) []
    TType
I32 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Int32) []
    TType
I64 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Int64) []
    TType
I96 -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Int64) []
    TType
FLOAT -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Float) []
    TType
DOUBLE -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe Double) []
    TType
STRING -> forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList @(Maybe T.Text) []
    TType
other -> [Char] -> Column
forall a. HasCallStack => [Char] -> a
error ([Char] -> Column) -> [Char] -> Column
forall a b. (a -> b) -> a -> b
$ [Char]
"Unsupported parquet type for column: " [Char] -> [Char] -> [Char]
forall a. Semigroup a => a -> a -> a
<> TType -> [Char]
forall a. Show a => a -> [Char]
show TType
other

declareColumnsFromCsvWithOpts :: CSV.ReadOptions -> String -> DecsQ
declareColumnsFromCsvWithOpts :: ReadOptions -> [Char] -> DecsQ
declareColumnsFromCsvWithOpts ReadOptions
opts [Char]
path = do
    DataFrame
df <- IO DataFrame -> Q DataFrame
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (ReadOptions -> [Char] -> IO DataFrame
CSV.readSeparated ReadOptions
opts [Char]
path)
    DataFrame -> DecsQ
declareColumns DataFrame
df

declareColumns :: DataFrame -> DecsQ
declareColumns :: DataFrame -> DecsQ
declareColumns = Maybe Text -> DataFrame -> DecsQ
declareColumnsWithPrefix' Maybe Text
forall a. Maybe a
Nothing

declareColumnsWithPrefix :: T.Text -> DataFrame -> DecsQ
declareColumnsWithPrefix :: Text -> DataFrame -> DecsQ
declareColumnsWithPrefix Text
prefix = Maybe Text -> DataFrame -> DecsQ
declareColumnsWithPrefix' (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
prefix)

declareColumnsWithPrefix' :: Maybe T.Text -> DataFrame -> DecsQ
declareColumnsWithPrefix' :: Maybe Text -> DataFrame -> DecsQ
declareColumnsWithPrefix' Maybe Text
prefix DataFrame
df =
    let
        names :: [Text]
names = (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)])
-> (DataFrame -> [(Text, Int)]) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)])
-> (DataFrame -> Map Text Int) -> DataFrame -> [(Text, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DataFrame -> Map Text Int
columnIndices) DataFrame
df
        types :: [[Char]]
types = (Text -> [Char]) -> [Text] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Column -> [Char]
columnTypeString (Column -> [Char]) -> (Text -> Column) -> Text -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> DataFrame -> Column
`unsafeGetColumn` DataFrame
df)) [Text]
names
        specs :: [(Text, Text, [Char])]
specs =
            (Text -> [Char] -> (Text, Text, [Char]))
-> [Text] -> [[Char]] -> [(Text, Text, [Char])]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
                ( \Text
name [Char]
type_ -> (Text
name, Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (Text -> Text
sanitize (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_")) Maybe Text
prefix Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
sanitize Text
name, [Char]
type_)
                )
                [Text]
names
                [[Char]]
types
     in
        ([[Dec]] -> [Dec]) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> Q a -> Q b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [[Dec]] -> [Dec]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat (Q [[Dec]] -> DecsQ) -> Q [[Dec]] -> DecsQ
forall a b. (a -> b) -> a -> b
$ [(Text, Text, [Char])]
-> ((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(Text, Text, [Char])]
specs (((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]])
-> ((Text, Text, [Char]) -> DecsQ) -> Q [[Dec]]
forall a b. (a -> b) -> a -> b
$ \(Text
raw, Text
nm, [Char]
tyStr) -> do
            Type
ty <- [[Char]] -> Q Type
typeFromString ([Char] -> [[Char]]
words [Char]
tyStr)
            [Char] -> (() -> Q ()) -> () -> Q ()
forall a. [Char] -> a -> a
trace (Text -> [Char]
T.unpack (Text
nm Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" :: Expr " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
tyStr)) () -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
            let n :: Name
n = [Char] -> Name
mkName (Text -> [Char]
T.unpack Text
nm)
            Dec
sig <- Name -> Q Type -> Q Dec
forall (m :: * -> *). Quote m => Name -> m Type -> m Dec
sigD Name
n [t|Expr $(Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Type
ty)|]
            Dec
val <- Q Pat -> Q Body -> [Q Dec] -> Q Dec
forall (m :: * -> *).
Quote m =>
m Pat -> m Body -> [m Dec] -> m Dec
valD (Name -> Q Pat
forall (m :: * -> *). Quote m => Name -> m Pat
varP Name
n) (Q Exp -> Q Body
forall (m :: * -> *). Quote m => m Exp -> m Body
normalB [|col $(Text -> Q Exp
forall t (m :: * -> *). (Lift t, Quote m) => t -> m Exp
forall (m :: * -> *). Quote m => Text -> m Exp
TH.lift Text
raw)|]) []
            [Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Dec
sig, Dec
val]