{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Operations.Transformations where
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Vector as V
import Control.Exception (throw)
import Data.Maybe
import DataFrame.Errors (DataFrameException (..), TypeErrorContext (..))
import DataFrame.Internal.Column (
Column (..),
Columnable,
TypedColumn (..),
ifoldrColumn,
imapColumn,
mapColumn,
)
import DataFrame.Internal.DataFrame (DataFrame (..), getColumn)
import DataFrame.Internal.Expression
import DataFrame.Operations.Core
apply ::
forall b c.
(Columnable b, Columnable c) =>
(b -> c) ->
T.Text ->
DataFrame ->
DataFrame
apply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f Text
columnName DataFrame
d = case (b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d of
Left (TypeMismatchException TypeErrorContext a b
context) ->
DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (TypeErrorContext a b
context{callingFunctionName = Just "apply"})
Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
Right DataFrame
df -> DataFrame
df
safeApply ::
forall b c.
(Columnable b, Columnable c) =>
(b -> c) ->
T.Text ->
DataFrame ->
Either DataFrameException DataFrame
safeApply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
Maybe Column
Nothing -> DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"apply" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
Just Column
column -> do
Column
column' <- (b -> c) -> Column -> Either DataFrameException Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Either DataFrameException Column
mapColumn b -> c
f Column
column
DataFrame -> Either DataFrameException DataFrame
forall a. a -> Either DataFrameException a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Either DataFrameException DataFrame)
-> DataFrame -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
d
derive :: forall a. (Columnable a) => T.Text -> Expr a -> DataFrame -> DataFrame
derive :: forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
derive Text
name Expr a
expr DataFrame
df = case forall a.
Columnable a =>
DataFrame -> Expr a -> Either DataFrameException (TypedColumn a)
interpret @a DataFrame
df Expr a
expr of
Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
Right (TColumn Column
value) -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name Column
value DataFrame
df
applyMany ::
(Columnable b, Columnable c) =>
(b -> c) ->
[T.Text] ->
DataFrame ->
DataFrame
applyMany :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> [Text] -> DataFrame -> DataFrame
applyMany b -> c
f [Text]
names DataFrame
df = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((b -> c) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f)) DataFrame
df [Text]
names
applyInt ::
(Columnable b) =>
(Int -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyInt :: forall b.
Columnable b =>
(Int -> b) -> Text -> DataFrame -> DataFrame
applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyDouble ::
(Columnable b) =>
(Double -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyDouble :: forall b.
Columnable b =>
(Double -> b) -> Text -> DataFrame -> DataFrame
applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyWhere ::
forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) ->
T.Text ->
(b -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyWhere :: forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame
applyWhere a -> Bool
condition Text
filterColumnName b -> b
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
Maybe Column
Nothing ->
DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
Text
filterColumnName
Text
"applyWhere"
(Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> Vector Int -> Vector Int)
-> Vector Int -> Column -> Either DataFrameException (Vector Int)
forall a b.
(Columnable a, Columnable b) =>
(Int -> a -> b -> b) -> b -> Column -> Either DataFrameException b
ifoldrColumn
(\Int
i a
val Vector Int
acc -> if a -> Bool
condition a
val then Int -> Vector Int -> Vector Int
forall a. a -> Vector a -> Vector a
V.cons Int
i Vector Int
acc else Vector Int
acc)
Vector Int
forall a. Vector a
V.empty
Column
column of
Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Vector Int
indexes ->
if Vector Int -> Bool
forall a. Vector a -> Bool
V.null Vector Int
indexes
then DataFrame
df
else (DataFrame -> Int -> DataFrame)
-> DataFrame -> Vector Int -> DataFrame
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d Int
i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i b -> b
f Text
columnName DataFrame
d) DataFrame
df Vector Int
indexes
applyAtIndex ::
forall a.
(Columnable a) =>
Int ->
(a -> a) ->
T.Text ->
DataFrame ->
DataFrame
applyAtIndex :: forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i a -> a
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
Maybe Column
Nothing ->
DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$
Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"applyAtIndex" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> a) -> Column -> Either DataFrameException Column
forall b c.
(Columnable b, Columnable c) =>
(Int -> b -> c) -> Column -> Either DataFrameException Column
imapColumn (\Int
index a
value -> if Int
index Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
i then a -> a
f a
value else a
value) Column
column of
Left DataFrameException
e -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
e
Right Column
column' -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
df
impute ::
forall b.
(Columnable b) =>
T.Text ->
b ->
DataFrame ->
DataFrame
impute :: forall b. Columnable b => Text -> b -> DataFrame -> DataFrame
impute Text
columnName b
value DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
Maybe Column
Nothing ->
DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"impute" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just (OptionalColumn Vector (Maybe a)
_) -> case (Maybe b -> b)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply (b -> Maybe b -> b
forall a. a -> Maybe a -> a
fromMaybe b
value) Text
columnName DataFrame
df of
Left (TypeMismatchException TypeErrorContext a b
context) -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (TypeErrorContext a b
context{callingFunctionName = Just "impute"})
Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
Right DataFrame
res -> DataFrame
res
Maybe Column
_ -> String -> DataFrame
forall a. HasCallStack => String -> a
error (String -> DataFrame) -> String -> DataFrame
forall a b. (a -> b) -> a -> b
$ String
"Cannot impute to a non-Empty column: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
columnName