{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE FlexibleContexts #-}
module DataFrame.Operations.Transformations where
import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Data.Vector.Generic as VG
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Control.Exception (throw)
import DataFrame.Errors (DataFrameException(..), TypeErrorContext(..))
import DataFrame.Internal.Column (Column(..), columnTypeString, imapColumn, ifoldrColumn, TypedColumn (TColumn), Columnable, mapColumn, unwrapTypedColumn)
import DataFrame.Internal.DataFrame (DataFrame(..), getColumn)
import DataFrame.Internal.Expression
import DataFrame.Internal.Row (mkRowFromArgs, RowValue, toRowValue)
import DataFrame.Operations.Core
import Data.Maybe
import Type.Reflection (typeRep, typeOf, TypeRep)
apply ::
forall b c.
(Columnable b, Columnable c) =>
(b -> c) ->
T.Text ->
DataFrame ->
DataFrame
apply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f Text
columnName DataFrame
d = case (b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d of
Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
Right DataFrame
df -> DataFrame
df
safeApply ::
forall b c.
(Columnable b, Columnable c) =>
(b -> c) ->
T.Text ->
DataFrame ->
Either DataFrameException DataFrame
safeApply :: forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply b -> c
f Text
columnName DataFrame
d = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
d of
Maybe Column
Nothing -> DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"apply" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
d)
Just Column
column -> case (b -> c) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c, UnboxIf c) =>
(b -> c) -> Column -> Maybe Column
mapColumn b -> c
f Column
column of
Maybe Column
Nothing -> DataFrameException -> Either DataFrameException DataFrame
forall a b. a -> Either a b
Left (DataFrameException -> Either DataFrameException DataFrame)
-> DataFrameException -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext b () -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
{ userType :: Either String (TypeRep b)
userType = TypeRep b -> Either String (TypeRep b)
forall a b. b -> Either a b
Right (TypeRep b -> Either String (TypeRep b))
-> TypeRep b -> Either String (TypeRep b)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b
, expectedType :: Either String (TypeRep ())
expectedType = String -> Either String (TypeRep ())
forall a b. a -> Either a b
Left (Column -> String
columnTypeString Column
column) :: Either String (TypeRep ())
, errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
, callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"apply"
})
Just Column
column' -> DataFrame -> Either DataFrameException DataFrame
forall a b. b -> Either a b
Right (DataFrame -> Either DataFrameException DataFrame)
-> DataFrame -> Either DataFrameException DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
d
derive :: forall a . Columnable a => T.Text -> Expr a -> DataFrame -> DataFrame
derive :: forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
derive Text
name Expr a
expr DataFrame
df = let
value :: TypedColumn a
value = forall a. Columnable a => DataFrame -> Expr a -> TypedColumn a
interpret @a DataFrame
df Expr a
expr
in Text -> Column -> DataFrame -> DataFrame
insertColumn Text
name (TypedColumn a -> Column
forall a. TypedColumn a -> Column
unwrapTypedColumn TypedColumn a
value) DataFrame
df
applyMany ::
(Columnable b, Columnable c) =>
(b -> c) ->
[T.Text] ->
DataFrame ->
DataFrame
applyMany :: forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> [Text] -> DataFrame -> DataFrame
applyMany b -> c
f [Text]
names DataFrame
df = (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' ((Text -> DataFrame -> DataFrame) -> DataFrame -> Text -> DataFrame
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((b -> c) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply b -> c
f)) DataFrame
df [Text]
names
applyInt ::
(Columnable b) =>
(Int -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyInt :: forall b.
Columnable b =>
(Int -> b) -> Text -> DataFrame -> DataFrame
applyInt = (Int -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyDouble ::
(Columnable b) =>
(Double -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyDouble :: forall b.
Columnable b =>
(Double -> b) -> Text -> DataFrame -> DataFrame
applyDouble = (Double -> b) -> Text -> DataFrame -> DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c) -> Text -> DataFrame -> DataFrame
apply
applyWhere ::
forall a b .
(Columnable a, Columnable b) =>
(a -> Bool) ->
T.Text ->
(b -> b) ->
T.Text ->
DataFrame ->
DataFrame
applyWhere :: forall a b.
(Columnable a, Columnable b) =>
(a -> Bool) -> Text -> (b -> b) -> Text -> DataFrame -> DataFrame
applyWhere a -> Bool
condition Text
filterColumnName b -> b
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
filterColumnName DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
filterColumnName Text
"applyWhere" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> Vector Int -> Vector Int)
-> Vector Int -> Column -> Maybe (Vector Int)
forall a b.
(Columnable a, Columnable b) =>
(Int -> a -> b -> b) -> b -> Column -> Maybe b
ifoldrColumn (\Int
i a
val Vector Int
acc -> if a -> Bool
condition a
val then Int -> Vector Int -> Vector Int
forall a. a -> Vector a -> Vector a
V.cons Int
i Vector Int
acc else Vector Int
acc) Vector Int
forall a. Vector a
V.empty Column
column of
Maybe (Vector Int)
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a () -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
{ userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
, expectedType :: Either String (TypeRep ())
expectedType = String -> Either String (TypeRep ())
forall a b. a -> Either a b
Left (Column -> String
columnTypeString Column
column) :: Either String (TypeRep ())
, errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
, callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"applyWhere"
})
Just Vector Int
indexes -> if Vector Int -> Bool
forall a. Vector a -> Bool
V.null Vector Int
indexes
then DataFrame
df
else (DataFrame -> Int -> DataFrame)
-> DataFrame -> Vector Int -> DataFrame
forall b a. (b -> a -> b) -> b -> Vector a -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (\DataFrame
d Int
i -> Int -> (b -> b) -> Text -> DataFrame -> DataFrame
forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i b -> b
f Text
columnName DataFrame
d) DataFrame
df Vector Int
indexes
applyAtIndex ::
forall a.
(Columnable a) =>
Int ->
(a -> a) ->
T.Text ->
DataFrame ->
DataFrame
applyAtIndex :: forall a.
Columnable a =>
Int -> (a -> a) -> Text -> DataFrame -> DataFrame
applyAtIndex Int
i a -> a
f Text
columnName DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"applyAtIndex" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just Column
column -> case (Int -> a -> a) -> Column -> Maybe Column
forall b c.
(Columnable b, Columnable c) =>
(Int -> b -> c) -> Column -> Maybe Column
imapColumn (\Int
index a
value -> if Int
index Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
i then a -> a
f a
value else a
value) Column
column of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a () -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (MkTypeErrorContext
{ userType :: Either String (TypeRep a)
userType = TypeRep a -> Either String (TypeRep a)
forall a b. b -> Either a b
Right (TypeRep a -> Either String (TypeRep a))
-> TypeRep a -> Either String (TypeRep a)
forall a b. (a -> b) -> a -> b
$ forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a
, expectedType :: Either String (TypeRep ())
expectedType = String -> Either String (TypeRep ())
forall a b. a -> Either a b
Left (Column -> String
columnTypeString Column
column) :: Either String (TypeRep ())
, errorColumnName :: Maybe String
errorColumnName = String -> Maybe String
forall a. a -> Maybe a
Just (Text -> String
T.unpack Text
columnName)
, callingFunctionName :: Maybe String
callingFunctionName = String -> Maybe String
forall a. a -> Maybe a
Just String
"applyAtIndex"
})
Just Column
column' -> Text -> Column -> DataFrame -> DataFrame
insertColumn Text
columnName Column
column' DataFrame
df
impute ::
forall b .
(Columnable b) =>
T.Text ->
b ->
DataFrame ->
DataFrame
impute :: forall b. Columnable b => Text -> b -> DataFrame -> DataFrame
impute Text
columnName b
value DataFrame
df = case Text -> DataFrame -> Maybe Column
getColumn Text
columnName DataFrame
df of
Maybe Column
Nothing -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
columnName Text
"impute" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
Just (OptionalColumn Vector (Maybe a)
_) -> case (Maybe b -> b)
-> Text -> DataFrame -> Either DataFrameException DataFrame
forall b c.
(Columnable b, Columnable c) =>
(b -> c)
-> Text -> DataFrame -> Either DataFrameException DataFrame
safeApply (b -> Maybe b -> b
forall a. a -> Maybe a -> a
fromMaybe b
value) Text
columnName DataFrame
df of
Left (TypeMismatchException TypeErrorContext a b
context) -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw (DataFrameException -> DataFrame)
-> DataFrameException -> DataFrame
forall a b. (a -> b) -> a -> b
$ TypeErrorContext a b -> DataFrameException
forall a b.
(Typeable a, Typeable b) =>
TypeErrorContext a b -> DataFrameException
TypeMismatchException (TypeErrorContext a b
context { callingFunctionName = Just "impute" })
Left DataFrameException
exception -> DataFrameException -> DataFrame
forall a e. Exception e => e -> a
throw DataFrameException
exception
Right DataFrame
res -> DataFrame
res
Maybe Column
_ -> String -> DataFrame
forall a. HasCallStack => String -> a
error String
"Cannot impute to a non-Empty column"