dataframe-0.7.0.0: A fast, safe, and intuitive DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame.Typed.Operations

Synopsis

Schema-preserving operations

filterWhere :: forall (cols :: [Type]). TExpr cols Bool -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows where a boolean expression evaluates to True. The expression is validated against the schema at compile time.

filter :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> (a -> Bool) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows by applying a predicate to a typed expression.

filterBy :: forall a (cols :: [Type]). Columnable a => (a -> Bool) -> TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows by a predicate on a column expression (flipped argument order).

filterAllJust :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame (StripAllMaybe cols) Source #

Keep only rows where ALL Optional columns have Just values. Strips Maybe from all column types in the result schema.

df :: TDF '[Column "x" (Maybe Double), Column "y" Int]
filterAllJust df :: TDF '[Column "x" Double, Column "y" Int]

filterJust :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame (StripMaybeAt name cols) Source #

Keep only rows where the named column has Just values. Strips Maybe from that column's type in the result schema.

filterJust @"x" df

filterNothing :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame cols Source #

Keep only rows where the named column has Nothing. Schema is preserved (column types unchanged, just fewer rows).

sortBy :: forall (cols :: [Type]). [TSortOrder cols] -> TypedDataFrame cols -> TypedDataFrame cols Source #

Sort by the given typed sort orders. Sort orders reference columns that are validated against the schema.

take :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take the first n rows.

takeLast :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take the last n rows.

drop :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Drop the first n rows.

dropLast :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Drop the last n rows.

range :: forall (cols :: [Type]). (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take rows in the given range (start, end).

cube :: forall (cols :: [Type]). (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take a sub-cube of the DataFrame.

distinct :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame cols Source #

Remove duplicate rows.

sample :: forall g (cols :: [Type]). RandomGen g => g -> Double -> TypedDataFrame cols -> TypedDataFrame cols Source #

Randomly sample a fraction of rows.

shuffle :: forall g (cols :: [Type]). RandomGen g => g -> TypedDataFrame cols -> TypedDataFrame cols Source #

Shuffle all rows randomly.

Schema-modifying operations

derive :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => TExpr cols a -> TypedDataFrame cols -> TypedDataFrame (Snoc cols (Column name a)) Source #

Derive a new column from a typed expression. The column name must NOT already exist in the schema (enforced at compile time via AssertAbsent). The expression is validated against the current schema.

df' = derive @"total" (col @"price" * col @"qty") df
-- df' :: TDF (Column "total" Double ': originalCols)

impute :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, Maybe a ~ Lookup name cols) => a -> TypedDataFrame cols -> TypedDataFrame (Impute name cols) Source #

select :: forall (names :: [Symbol]) (cols :: [Type]). (AllKnownSymbol names, AssertAllPresent names cols) => TypedDataFrame cols -> TypedDataFrame (SubsetSchema names cols) Source #

Select a subset of columns by name.

exclude :: forall (names :: [Symbol]) (cols :: [Type]). AllKnownSymbol names => TypedDataFrame cols -> TypedDataFrame (ExcludeSchema names cols) Source #

Exclude columns by name.

rename :: forall (old :: Symbol) (new :: Symbol) (cols :: [Type]). (KnownSymbol old, KnownSymbol new) => TypedDataFrame cols -> TypedDataFrame (RenameInSchema old new cols) Source #

Rename a column.

renameMany :: forall (pairs :: [(Symbol, Symbol)]) (cols :: [Type]). AllKnownPairs pairs => TypedDataFrame cols -> TypedDataFrame (RenameManyInSchema pairs cols) Source #

Rename multiple columns from a type-level list of pairs.

insert :: forall (name :: Symbol) a (cols :: [Type]) t. (KnownSymbol name, Columnable a, Foldable t, AssertAbsent name cols) => t a -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a new column from a Foldable container.

insertColumn :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => Column -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a raw Column value.

insertVector :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => Vector a -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a boxed Vector.

cloneColumn :: forall (old :: Symbol) (new :: Symbol) (cols :: [Type]). (KnownSymbol old, KnownSymbol new, AssertPresent old cols, AssertAbsent new cols) => TypedDataFrame cols -> TypedDataFrame (Column new (Lookup old cols) ': cols) Source #

Clone an existing column under a new name.

dropColumn :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame (RemoveColumn name cols) Source #

Drop a column by name.

replaceColumn :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, a ~ Lookup name cols, AssertPresent name cols) => TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols Source #

Replace an existing column with new values derived from a typed expression. The column must already exist and the new type must match.

Metadata

dimensions :: forall (cols :: [Type]). TypedDataFrame cols -> (Int, Int) Source #

nRows :: forall (cols :: [Type]). TypedDataFrame cols -> Int Source #

nColumns :: forall (cols :: [Type]). TypedDataFrame cols -> Int Source #

columnNames :: forall (cols :: [Type]). TypedDataFrame cols -> [Text] Source #

Vertical merge

append :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame cols -> TypedDataFrame cols Source #

Vertically merge two DataFrames with the same schema.