{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Transformations where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V

import Control.Exception (throw)
import Data.Maybe
import DataFrame.Errors (DataFrameException (..), TypeErrorContext (..))
import DataFrame.Internal.Column (
    Column (..),
    Columnable,
    TypedColumn (..),
    ifoldrColumn,
    imapColumn,
    mapColumn,
 )
import DataFrame.Internal.DataFrame (DataFrame (..), getColumn)
import DataFrame.Internal.Expression
import DataFrame.Operations.Core

-- | O(k) Apply a function to a given column in a dataframe.
apply ::
    forall b c.
    (Columnable b, Columnable c) =>
    -- | function to apply
    (b -> c) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    DataFrame
apply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f Text
columnName DataFrame
d = case (b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d of
    Left (TypeMismatchException TypeErrorContext a b
context) ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (TypeErrorContext a b
context{callingFunctionName = Just "apply"})
    Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
    Right DataFrame
df -> DataFrame
df

-- | O(k) Safe version of the apply function. Returns (instead of throwing) the error.
safeApply ::
    forall b c.
    (Columnable b, Columnable c) =>
    -- | function to apply
    (b -> c) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    Either DataFrameException DataFrame
safeApply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
    Maybe Column
Nothing -> DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"apply" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
    Just Column
column -> do
        Column
column' <- (b -> c) -> Column -> Either DataFrameException Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Either DataFrameException Column
mapColumn b -> c
f Column
column
        DataFrame -> Either DataFrameException DataFrame
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Either DataFrameException DataFrame)
-> DataFrame -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
d

{- | O(k) Apply a function to a combination of columns in a dataframe and
add the result into `alias` column.
-}
derive :: forall a. (Columnable a) => T.Text -> Expr a -> DataFrame -> DataFrame
derive :: forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
derive Text
name Expr a
expr DataFrame
df = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df Expr a
expr of
    Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
    Right (TColumn Column
value) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
df

-- | O(k * n) Apply a function to given column names in a dataframe.
applyMany ::
    (Columnable b, Columnable c) =>
    (b -> c) ->
    [T.Text] ->
    DataFrame ->
    DataFrame
applyMany :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> [Text] -> DataFrame -> DataFrame
applyMany b -> c
f [Text]
names DataFrame
df = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((b -> c) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f)) DataFrame
df [Text]
names

-- | O(k) Convenience function that applies to an int column.
applyInt ::
    (Columnable b) =>
    -- | function to apply
    (Int -> b) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    DataFrame
applyInt :: forall b.
Columnable b =>
(Int -> b) -> Text -> DataFrame -> DataFrame
applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply

-- | O(k) Convenience function that applies to an double column.
applyDouble ::
    (Columnable b) =>
    -- | function to apply
    (Double -> b) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    DataFrame
applyDouble :: forall b.
Columnable b =>
(Double -> b) -> Text -> DataFrame -> DataFrame
applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply

{- | O(k * n) Apply a function to a column only if there is another column
value that matches the given criterion.

> applyWhere (<20) "Age" (const "Gen-Z") "Generation" df
-}
applyWhere ::
    forall a b.
    (Columnable a, Columnable b) =>
    -- | Filter condition
    (a -> Bool) ->
    -- | Criterion Column
    T.Text ->
    -- | function to apply
    (b -> b) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    DataFrame
applyWhere :: forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame
applyWhere a -> Bool
condition Text
filterColumnName b -> b
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
                Text
filterColumnName
                Text
"applyWhere"
                (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just Column
column -> case (Int -> a -> Vector Int -> Vector Int)
-> Vector Int -> Column -> Either DataFrameException (Vector Int)
forall a b.
(Columnable a, Columnable b) =>
(Int -> a -> b -> b) -> b -> Column -> Either DataFrameException b
ifoldrColumn
        (\Int
i a
val Vector Int
acc -> if a -> Bool
condition a
val then Int -> Vector Int -> Vector Int
forall a. a -> Vector a -> Vector a
V.cons Int
i Vector Int
acc else Vector Int
acc)
        Vector Int
forall a. Vector a
V.empty
        Column
column of
        Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
        Right Vector Int
indexes ->
            if Vector Int -> Bool
forall a. Vector a -> Bool
V.null Vector Int
indexes
                then DataFrame
df
                else (DataFrame -> Int -> DataFrame)
-> DataFrame -> Vector Int -> DataFrame
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d Int
i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i b -> b
f Text
columnName DataFrame
d) DataFrame
df Vector Int
indexes

-- | O(k) Apply a function to the column at a given index.
applyAtIndex ::
    forall a.
    (Columnable a) =>
    -- | Index
    Int ->
    -- | function to apply
    (a -> a) ->
    -- | Column name
    T.Text ->
    -- | DataFrame to apply operation to
    DataFrame ->
    DataFrame
applyAtIndex :: forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i a -> a
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
            Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"applyAtIndex" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just Column
column -> case (Int -> a -> a) -> Column -> Either DataFrameException Column
forall b c.
(Columnable b, Columnable c) =>
(Int -> b -> c) -> Column -> Either DataFrameException Column
imapColumn (\Int
index a
value -> if Int
index Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
i then a -> a
f a
value else a
value) Column
column of
        Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
        Right Column
column' -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
df

-- | Replace all instances of `Nothing` in a column with the given value.
impute ::
    forall b.
    (Columnable b) =>
    T.Text ->
    b ->
    DataFrame ->
    DataFrame
impute :: forall b. Columnable b => Text -> b -> DataFrame -> DataFrame
impute Text
columnName b
value DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
    Maybe Column
Nothing ->
        DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"impute" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
    Just (OptionalColumn Vector (Maybe a)
_) -> case (Maybe b -> b)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply (b -> Maybe b -> b
forall a. a -> Maybe a -> a
fromMaybe b
value) Text
columnName DataFrame
df of
        Left (TypeMismatchException TypeErrorContext a b
context) -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (TypeErrorContext a b
context{callingFunctionName = Just "impute"})
        Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
        Right DataFrame
res -> DataFrame
res
    Maybe Column
_ -> String -> DataFrame
forall a. HasCallStack => String -> a
error (String -> DataFrame) -> String -> DataFrame
forall a b. (a -> b) -> a -> b
$ String
"Cannot impute to a non-Empty column: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
columnName