{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}

module DataFrame.Typed.Operations (
    -- * Schema-preserving operations
    filterWhere,
    filter,
    filterBy,
    filterAllJust,
    filterJust,
    filterNothing,
    sortBy,
    take,
    takeLast,
    drop,
    dropLast,
    range,
    cube,
    distinct,
    sample,
    shuffle,

    -- * Schema-modifying operations
    derive,
    impute,
    select,
    exclude,
    rename,
    renameMany,
    insert,
    insertColumn,
    insertVector,
    cloneColumn,
    dropColumn,
    replaceColumn,

    -- * Metadata
    dimensions,
    nRows,
    nColumns,
    columnNames,

    -- * Vertical merge
    append,
) where

import Data.Proxy (Proxy (..))
import qualified Data.Text as T
import qualified Data.Vector as V
import GHC.TypeLits (KnownSymbol, Symbol, symbolVal)
import System.Random (RandomGen)
import Prelude hiding (drop, filter, take)

import qualified DataFrame.Functions as DF
import DataFrame.Internal.Column (Columnable)
import qualified DataFrame.Internal.Column as C
import qualified DataFrame.Operations.Aggregation as DA
import qualified DataFrame.Operations.Core as D
import DataFrame.Operations.Merge ()
import qualified DataFrame.Operations.Permutation as D
import qualified DataFrame.Operations.Subset as D
import qualified DataFrame.Operations.Transformations as D

import DataFrame.Typed.Freeze (unsafeFreeze)
import DataFrame.Typed.Schema
import DataFrame.Typed.Types (TExpr (..), TSortOrder (..), TypedDataFrame (..))
import qualified DataFrame.Typed.Types as T

-------------------------------------------------------------------------------
-- Schema-preserving operations
-------------------------------------------------------------------------------

{- | Filter rows where a boolean expression evaluates to True.
The expression is validated against the schema at compile time.
-}
filterWhere :: TExpr cols Bool -> TypedDataFrame cols -> TypedDataFrame cols
filterWhere :: forall (cols :: [*]).
TExpr cols Bool -> TypedDataFrame cols -> TypedDataFrame cols
filterWhere (TExpr Expr Bool
expr) (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Expr Bool -> DataFrame -> DataFrame
D.filterWhere Expr Bool
expr DataFrame
df)

-- | Filter rows by applying a predicate to a typed expression.
filter ::
    (Columnable a) =>
    TExpr cols a -> (a -> Bool) -> TypedDataFrame cols -> TypedDataFrame cols
filter :: forall a (cols :: [*]).
Columnable a =>
TExpr cols a
-> (a -> Bool) -> TypedDataFrame cols -> TypedDataFrame cols
filter (TExpr Expr a
expr) a -> Bool
pred' (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Expr a -> (a -> Bool) -> DataFrame -> DataFrame
forall a.
Columnable a =>
Expr a -> (a -> Bool) -> DataFrame -> DataFrame
D.filter Expr a
expr a -> Bool
pred' DataFrame
df)

-- | Filter rows by a predicate on a column expression (flipped argument order).
filterBy ::
    (Columnable a) =>
    (a -> Bool) -> TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols
filterBy :: forall a (cols :: [*]).
Columnable a =>
(a -> Bool)
-> TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols
filterBy a -> Bool
pred' (TExpr Expr a
expr) (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF ((a -> Bool) -> Expr a -> DataFrame -> DataFrame
forall a.
Columnable a =>
(a -> Bool) -> Expr a -> DataFrame -> DataFrame
D.filterBy a -> Bool
pred' Expr a
expr DataFrame
df)

{- | Keep only rows where ALL Optional columns have Just values.
Strips 'Maybe' from all column types in the result schema.

@
df :: TDF '[Column \"x\" (Maybe Double), Column \"y\" Int]
filterAllJust df :: TDF '[Column \"x\" Double, Column \"y\" Int]
@
-}
filterAllJust :: TypedDataFrame cols -> TypedDataFrame (StripAllMaybe cols)
filterAllJust :: forall (cols :: [*]).
TypedDataFrame cols -> TypedDataFrame (StripAllMaybe cols)
filterAllJust (TDF DataFrame
df) = DataFrame -> TypedDataFrame (StripAllMaybe cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (DataFrame -> DataFrame
D.filterAllJust DataFrame
df)

{- | Keep only rows where the named column has Just values.
Strips 'Maybe' from that column's type in the result schema.

@
filterJust \@\"x\" df
@
-}
filterJust ::
    forall name cols.
    ( KnownSymbol name
    , AssertPresent name cols
    ) =>
    TypedDataFrame cols -> TypedDataFrame (StripMaybeAt name cols)
filterJust :: forall (name :: Symbol) (cols :: [*]).
(KnownSymbol name, AssertPresent name cols) =>
TypedDataFrame cols -> TypedDataFrame (StripMaybeAt name cols)
filterJust (TDF DataFrame
df) = DataFrame -> TypedDataFrame (StripMaybeAt name cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> DataFrame -> DataFrame
D.filterJust Text
colName DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

{- | Keep only rows where the named column has Nothing.
Schema is preserved (column types unchanged, just fewer rows).
-}
filterNothing ::
    forall name cols.
    ( KnownSymbol name
    , AssertPresent name cols
    ) =>
    TypedDataFrame cols -> TypedDataFrame cols
filterNothing :: forall (name :: Symbol) (cols :: [*]).
(KnownSymbol name, AssertPresent name cols) =>
TypedDataFrame cols -> TypedDataFrame cols
filterNothing (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Text -> DataFrame -> DataFrame
D.filterNothing Text
colName DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

{- | Sort by the given typed sort orders.
Sort orders reference columns that are validated against the schema.
-}
sortBy :: [TSortOrder cols] -> TypedDataFrame cols -> TypedDataFrame cols
sortBy :: forall (cols :: [*]).
[TSortOrder cols] -> TypedDataFrame cols -> TypedDataFrame cols
sortBy [TSortOrder cols]
ords (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF ([SortOrder] -> DataFrame -> DataFrame
D.sortBy ((TSortOrder cols -> SortOrder) -> [TSortOrder cols] -> [SortOrder]
forall a b. (a -> b) -> [a] -> [b]
map TSortOrder cols -> SortOrder
forall (cols :: [*]). TSortOrder cols -> SortOrder
toUntypedSort [TSortOrder cols]
ords) DataFrame
df)
  where
    toUntypedSort :: TSortOrder cols -> D.SortOrder
    toUntypedSort :: forall (cols :: [*]). TSortOrder cols -> SortOrder
toUntypedSort (Asc (TExpr Expr a
e)) = Expr a -> SortOrder
forall a. Columnable a => Expr a -> SortOrder
D.Asc Expr a
e
    toUntypedSort (Desc (TExpr Expr a
e)) = Expr a -> SortOrder
forall a. Columnable a => Expr a -> SortOrder
D.Desc Expr a
e

-- | Take the first @n@ rows.
take :: Int -> TypedDataFrame cols -> TypedDataFrame cols
take :: forall (cols :: [*]).
Int -> TypedDataFrame cols -> TypedDataFrame cols
take Int
n (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Int -> DataFrame -> DataFrame
D.take Int
n DataFrame
df)

-- | Take the last @n@ rows.
takeLast :: Int -> TypedDataFrame cols -> TypedDataFrame cols
takeLast :: forall (cols :: [*]).
Int -> TypedDataFrame cols -> TypedDataFrame cols
takeLast Int
n (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Int -> DataFrame -> DataFrame
D.takeLast Int
n DataFrame
df)

-- | Drop the first @n@ rows.
drop :: Int -> TypedDataFrame cols -> TypedDataFrame cols
drop :: forall (cols :: [*]).
Int -> TypedDataFrame cols -> TypedDataFrame cols
drop Int
n (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Int -> DataFrame -> DataFrame
D.drop Int
n DataFrame
df)

-- | Drop the last @n@ rows.
dropLast :: Int -> TypedDataFrame cols -> TypedDataFrame cols
dropLast :: forall (cols :: [*]).
Int -> TypedDataFrame cols -> TypedDataFrame cols
dropLast Int
n (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (Int -> DataFrame -> DataFrame
D.dropLast Int
n DataFrame
df)

-- | Take rows in the given range (start, end).
range :: (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols
range :: forall (cols :: [*]).
(Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols
range (Int, Int)
r (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF ((Int, Int) -> DataFrame -> DataFrame
D.range (Int, Int)
r DataFrame
df)

-- | Take a sub-cube of the DataFrame.
cube :: (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols
cube :: forall (cols :: [*]).
(Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols
cube (Int, Int)
c (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF ((Int, Int) -> DataFrame -> DataFrame
D.cube (Int, Int)
c DataFrame
df)

-- | Remove duplicate rows.
distinct :: TypedDataFrame cols -> TypedDataFrame cols
distinct :: forall (cols :: [*]). TypedDataFrame cols -> TypedDataFrame cols
distinct (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (DataFrame -> DataFrame
DA.distinct DataFrame
df)

-- | Randomly sample a fraction of rows.
sample ::
    (RandomGen g) => g -> Double -> TypedDataFrame cols -> TypedDataFrame cols
sample :: forall g (cols :: [*]).
RandomGen g =>
g -> Double -> TypedDataFrame cols -> TypedDataFrame cols
sample g
g Double
frac (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (g -> Double -> DataFrame -> DataFrame
forall g. RandomGen g => g -> Double -> DataFrame -> DataFrame
D.sample g
g Double
frac DataFrame
df)

-- | Shuffle all rows randomly.
shuffle :: (RandomGen g) => g -> TypedDataFrame cols -> TypedDataFrame cols
shuffle :: forall g (cols :: [*]).
RandomGen g =>
g -> TypedDataFrame cols -> TypedDataFrame cols
shuffle g
g (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (g -> DataFrame -> DataFrame
forall g. RandomGen g => g -> DataFrame -> DataFrame
D.shuffle g
g DataFrame
df)

-------------------------------------------------------------------------------
-- Schema-modifying operations
-------------------------------------------------------------------------------

{- | Derive a new column from a typed expression. The column name must NOT
already exist in the schema (enforced at compile time via 'AssertAbsent').
The expression is validated against the current schema.

@
df' = derive \@\"total\" (col \@\"price\" * col \@\"qty\") df
-- df' :: TDF (Column \"total\" Double ': originalCols)
@
-}
derive ::
    forall name a cols.
    ( KnownSymbol name
    , Columnable a
    , AssertAbsent name cols
    ) =>
    TExpr cols a ->
    TypedDataFrame cols ->
    TypedDataFrame (Snoc cols (T.Column name a))
derive :: forall (name :: Symbol) a (cols :: [*]).
(KnownSymbol name, Columnable a, AssertAbsent name cols) =>
TExpr cols a
-> TypedDataFrame cols
-> TypedDataFrame (Snoc cols (Column name a))
derive (TExpr Expr a
expr) (TDF DataFrame
df) = DataFrame -> TypedDataFrame (Snoc cols (Column name a))
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Expr a -> DataFrame -> DataFrame
forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
D.derive Text
colName Expr a
expr DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

impute ::
    forall name a cols.
    ( KnownSymbol name
    , Columnable a
    , Maybe a ~ Lookup name cols
    ) =>
    a ->
    TypedDataFrame cols ->
    TypedDataFrame (Impute name cols)
impute :: forall (name :: Symbol) a (cols :: [*]).
(KnownSymbol name, Columnable a, Maybe a ~ Lookup name cols) =>
a -> TypedDataFrame cols -> TypedDataFrame (Impute name cols)
impute a
value (TDF DataFrame
df) =
    DataFrame -> TypedDataFrame (Impute name cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze
        (Text -> Expr a -> DataFrame -> DataFrame
forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
D.derive Text
colName (a -> Expr (Maybe a) -> Expr a
forall a. Columnable a => a -> Expr (Maybe a) -> Expr a
DF.fromMaybe a
value (forall a. Columnable a => Text -> Expr a
DF.col @(Maybe a) Text
colName)) DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

-- | Select a subset of columns by name.
select ::
    forall (names :: [Symbol]) cols.
    (AllKnownSymbol names, AssertAllPresent names cols) =>
    TypedDataFrame cols -> TypedDataFrame (SubsetSchema names cols)
select :: forall (names :: [Symbol]) (cols :: [*]).
(AllKnownSymbol names, AssertAllPresent names cols) =>
TypedDataFrame cols -> TypedDataFrame (SubsetSchema names cols)
select (TDF DataFrame
df) = DataFrame -> TypedDataFrame (SubsetSchema names cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze ([Text] -> DataFrame -> DataFrame
D.select (forall (names :: [Symbol]). AllKnownSymbol names => [Text]
symbolVals @names) DataFrame
df)

-- | Exclude columns by name.
exclude ::
    forall (names :: [Symbol]) cols.
    (AllKnownSymbol names) =>
    TypedDataFrame cols -> TypedDataFrame (ExcludeSchema names cols)
exclude :: forall (names :: [Symbol]) (cols :: [*]).
AllKnownSymbol names =>
TypedDataFrame cols -> TypedDataFrame (ExcludeSchema names cols)
exclude (TDF DataFrame
df) = DataFrame -> TypedDataFrame (ExcludeSchema names cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze ([Text] -> DataFrame -> DataFrame
D.exclude (forall (names :: [Symbol]). AllKnownSymbol names => [Text]
symbolVals @names) DataFrame
df)

-- | Rename a column.
rename ::
    forall old new cols.
    (KnownSymbol old, KnownSymbol new) =>
    TypedDataFrame cols -> TypedDataFrame (RenameInSchema old new cols)
rename :: forall (old :: Symbol) (new :: Symbol) (cols :: [*]).
(KnownSymbol old, KnownSymbol new) =>
TypedDataFrame cols -> TypedDataFrame (RenameInSchema old new cols)
rename (TDF DataFrame
df) = DataFrame -> TypedDataFrame (RenameInSchema old new cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Text -> DataFrame -> DataFrame
D.rename Text
oldName Text
newName DataFrame
df)
  where
    oldName :: Text
oldName = String -> Text
T.pack (Proxy old -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @old))
    newName :: Text
newName = String -> Text
T.pack (Proxy new -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @new))

-- | Rename multiple columns from a type-level list of pairs.
renameMany ::
    forall (pairs :: [(Symbol, Symbol)]) cols.
    (AllKnownPairs pairs) =>
    TypedDataFrame cols -> TypedDataFrame (RenameManyInSchema pairs cols)
renameMany :: forall (pairs :: [(Symbol, Symbol)]) (cols :: [*]).
AllKnownPairs pairs =>
TypedDataFrame cols
-> TypedDataFrame (RenameManyInSchema pairs cols)
renameMany (TDF DataFrame
df) = DataFrame -> TypedDataFrame (RenameManyInSchema pairs cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze ([(Text, Text)] -> DataFrame -> DataFrame
foldRenames (forall (pairs :: [(Symbol, Symbol)]).
AllKnownPairs pairs =>
[(Text, Text)]
pairVals @pairs) DataFrame
df)
  where
    foldRenames :: [(Text, Text)] -> DataFrame -> DataFrame
foldRenames [] DataFrame
df' = DataFrame
df'
    foldRenames ((Text
old, Text
new) : [(Text, Text)]
rest) DataFrame
df' = [(Text, Text)] -> DataFrame -> DataFrame
foldRenames [(Text, Text)]
rest (Text -> Text -> DataFrame -> DataFrame
D.rename Text
old Text
new DataFrame
df')

-- | Insert a new column from a Foldable container.
insert ::
    forall name a cols t.
    ( KnownSymbol name
    , Columnable a
    , Foldable t
    , AssertAbsent name cols
    ) =>
    t a -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols)
insert :: forall (name :: Symbol) a (cols :: [*]) (t :: * -> *).
(KnownSymbol name, Columnable a, Foldable t,
 AssertAbsent name cols) =>
t a -> TypedDataFrame cols -> TypedDataFrame (Column name a : cols)
insert t a
xs (TDF DataFrame
df) = DataFrame -> TypedDataFrame (Column name a : cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> t a -> DataFrame -> DataFrame
forall a (t :: * -> *).
(Columnable a, Foldable t) =>
Text -> t a -> DataFrame -> DataFrame
D.insert Text
colName t a
xs DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

-- | Insert a raw 'Column' value.
insertColumn ::
    forall name a cols.
    ( KnownSymbol name
    , Columnable a
    , AssertAbsent name cols
    ) =>
    C.Column -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols)
insertColumn :: forall (name :: Symbol) a (cols :: [*]).
(KnownSymbol name, Columnable a, AssertAbsent name cols) =>
Column
-> TypedDataFrame cols -> TypedDataFrame (Column name a : cols)
insertColumn Column
col (TDF DataFrame
df) = DataFrame -> TypedDataFrame (Column name a : cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
colName Column
col DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

-- | Insert a boxed 'Vector'.
insertVector ::
    forall name a cols.
    ( KnownSymbol name
    , Columnable a
    , AssertAbsent name cols
    ) =>
    V.Vector a -> TypedDataFrame cols -> TypedDataFrame (T.Column name a ': cols)
insertVector :: forall (name :: Symbol) a (cols :: [*]).
(KnownSymbol name, Columnable a, AssertAbsent name cols) =>
Vector a
-> TypedDataFrame cols -> TypedDataFrame (Column name a : cols)
insertVector Vector a
vec (TDF DataFrame
df) = DataFrame -> TypedDataFrame (Column name a : cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Vector a -> DataFrame -> DataFrame
forall a.
Columnable a =>
Text -> Vector a -> DataFrame -> DataFrame
D.insertVector Text
colName Vector a
vec DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

-- | Clone an existing column under a new name.
cloneColumn ::
    forall old new cols.
    ( KnownSymbol old
    , KnownSymbol new
    , AssertPresent old cols
    , AssertAbsent new cols
    ) =>
    TypedDataFrame cols -> TypedDataFrame (T.Column new (Lookup old cols) ': cols)
cloneColumn :: forall (old :: Symbol) (new :: Symbol) (cols :: [*]).
(KnownSymbol old, KnownSymbol new, AssertPresent old cols,
 AssertAbsent new cols) =>
TypedDataFrame cols
-> TypedDataFrame (Column new (Lookup old cols) : cols)
cloneColumn (TDF DataFrame
df) = DataFrame -> TypedDataFrame (Column new (Lookup old cols) : cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Text -> DataFrame -> DataFrame
D.cloneColumn Text
oldName Text
newName DataFrame
df)
  where
    oldName :: Text
oldName = String -> Text
T.pack (Proxy old -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @old))
    newName :: Text
newName = String -> Text
T.pack (Proxy new -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @new))

-- | Drop a column by name.
dropColumn ::
    forall name cols.
    ( KnownSymbol name
    , AssertPresent name cols
    ) =>
    TypedDataFrame cols -> TypedDataFrame (RemoveColumn name cols)
dropColumn :: forall (name :: Symbol) (cols :: [*]).
(KnownSymbol name, AssertPresent name cols) =>
TypedDataFrame cols -> TypedDataFrame (RemoveColumn name cols)
dropColumn (TDF DataFrame
df) = DataFrame -> TypedDataFrame (RemoveColumn name cols)
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze ([Text] -> DataFrame -> DataFrame
D.exclude [Text
colName] DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

{- | Replace an existing column with new values derived from a typed expression.
The column must already exist and the new type must match.
-}
replaceColumn ::
    forall name a cols.
    ( KnownSymbol name
    , Columnable a
    , a ~ Lookup name cols
    , AssertPresent name cols
    ) =>
    TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols
replaceColumn :: forall (name :: Symbol) a (cols :: [*]).
(KnownSymbol name, Columnable a, a ~ Lookup name cols,
 AssertPresent name cols) =>
TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols
replaceColumn (TExpr Expr a
expr) (TDF DataFrame
df) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze (Text -> Expr a -> DataFrame -> DataFrame
forall a. Columnable a => Text -> Expr a -> DataFrame -> DataFrame
D.derive Text
colName Expr a
expr DataFrame
df)
  where
    colName :: Text
colName = String -> Text
T.pack (Proxy name -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @name))

-- | Vertically merge two DataFrames with the same schema.
append :: TypedDataFrame cols -> TypedDataFrame cols -> TypedDataFrame cols
append :: forall (cols :: [*]).
TypedDataFrame cols -> TypedDataFrame cols -> TypedDataFrame cols
append (TDF DataFrame
a) (TDF DataFrame
b) = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF (DataFrame
a DataFrame -> DataFrame -> DataFrame
forall a. Semigroup a => a -> a -> a
<> DataFrame
b)

-------------------------------------------------------------------------------
-- Metadata (pass-through)
-------------------------------------------------------------------------------

dimensions :: TypedDataFrame cols -> (Int, Int)
dimensions :: forall (cols :: [*]). TypedDataFrame cols -> (Int, Int)
dimensions (TDF DataFrame
df) = DataFrame -> (Int, Int)
D.dimensions DataFrame
df

nRows :: TypedDataFrame cols -> Int
nRows :: forall (cols :: [*]). TypedDataFrame cols -> Int
nRows (TDF DataFrame
df) = DataFrame -> Int
D.nRows DataFrame
df

nColumns :: TypedDataFrame cols -> Int
nColumns :: forall (cols :: [*]). TypedDataFrame cols -> Int
nColumns (TDF DataFrame
df) = DataFrame -> Int
D.nColumns DataFrame
df

columnNames :: TypedDataFrame cols -> [T.Text]
columnNames :: forall (cols :: [*]). TypedDataFrame cols -> [Text]
columnNames (TDF DataFrame
df) = DataFrame -> [Text]
D.columnNames DataFrame
df

-------------------------------------------------------------------------------
-- Internal helpers
-------------------------------------------------------------------------------

-- | Helper class for extracting [(Text, Text)] from type-level pairs.
class AllKnownPairs (pairs :: [(Symbol, Symbol)]) where
    pairVals :: [(T.Text, T.Text)]

instance AllKnownPairs '[] where
    pairVals :: [(Text, Text)]
pairVals = []

instance
    (KnownSymbol a, KnownSymbol b, AllKnownPairs rest) =>
    AllKnownPairs ('(a, b) ': rest)
    where
    pairVals :: [(Text, Text)]
pairVals =
        ( String -> Text
T.pack (Proxy a -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @a))
        , String -> Text
T.pack (Proxy b -> String
forall (n :: Symbol) (proxy :: Symbol -> *).
KnownSymbol n =>
proxy n -> String
symbolVal (forall {k} (t :: k). Proxy t
forall (t :: Symbol). Proxy t
Proxy @b))
        )
            (Text, Text) -> [(Text, Text)] -> [(Text, Text)]
forall a. a -> [a] -> [a]
: forall (pairs :: [(Symbol, Symbol)]).
AllKnownPairs pairs =>
[(Text, Text)]
pairVals @rest