dataframe-0.3.3.2: A fast, safe, and intuitive DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame.Operations.Subset

Synopsis

Documentation

take :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the first n rows of a DataFrame.

takeLast :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the last n rows of a DataFrame.

drop :: Int -> DataFrame -> DataFrame Source #

O(k * n) Drop the first n rows of a DataFrame.

dropLast :: Int -> DataFrame -> DataFrame Source #

O(k * n) Drop the last n rows of a DataFrame.

range :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k * n) Take a range of rows of a DataFrame.

clip :: Int -> Int -> Int -> Int Source #

filter Source #

Arguments

:: Columnable a 
=> Text

Column to filter by

-> (a -> Bool)

Filter condition

-> DataFrame

Dataframe to filter

-> DataFrame 

O(n * k) Filter rows by a given condition.

filter "x" even df

filterByVector :: forall a b v. (Vector v b, Columnable a, Columnable b) => Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame Source #

indexes :: Vector v a => (a -> Bool) -> v a -> Vector Int Source #

filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame Source #

O(k) a version of filter where the predicate comes first.

filterBy even "x" df

filterWhere :: Expr Bool -> DataFrame -> DataFrame Source #

O(k) filters the dataframe with a boolean expression.

filterWhere (F.col @Int x + F.col y F.> 5) df

filterJust :: Text -> DataFrame -> DataFrame Source #

O(k) removes all rows with Nothing in a given column from the dataframe.

filterJust "col" df

filterNothing :: Text -> DataFrame -> DataFrame Source #

O(k) returns all rows with Nothing in a give column.

filterNothing "col" df

filterAllJust :: DataFrame -> DataFrame Source #

O(n * k) removes all rows with Nothing from the dataframe.

filterAllJust df

cube :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.

cube (10, 5) df

select :: [Text] -> DataFrame -> DataFrame Source #

O(n) Selects a number of columns in a given dataframe.

select ["name", "age"] df

byName :: Text -> SelectionCriteria Source #

Criteria for selecting a column by name.

selectBy [byName "Age"] df

equivalent to:

select ["Age"] df

byProperty :: (Column -> Bool) -> SelectionCriteria Source #

Criteria for selecting columns whose property satisfies given predicate.

selectBy [byProperty isNumeric] df

byNameProperty :: (Text -> Bool) -> SelectionCriteria Source #

Criteria for selecting columns whose name satisfies given predicate.

selectBy [byNameProperty (T.isPrefixOf "weight")] df

byNameRange :: (Text, Text) -> SelectionCriteria Source #

Criteria for selecting columns whose names are in the given lexicographic range (inclusive).

selectBy [byNameRange ("a", "c")] df

byIndexRange :: (Int, Int) -> SelectionCriteria Source #

Criteria for selecting columns whose indices are in the given (inclusive) range.

selectBy [byIndexRange (0, 5)] df

selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame Source #

O(n) select columns by column predicate name.

exclude :: [Text] -> DataFrame -> DataFrame Source #

O(n) inverse of select

exclude ["Name"] df

sample :: RandomGen g => g -> Double -> DataFrame -> DataFrame Source #

Sample a dataframe. The double parameter must be between 0 and 1 (inclusive).

Example

Expand
ghci> import System.Random
ghci> D.sample (mkStdGen 137) 0.1 df

randomSplit :: RandomGen g => g -> Double -> DataFrame -> (DataFrame, DataFrame) Source #

Split a dataset into two. The first in the tuple gets a sample of p (0 <= p <= 1) and the second gets (1 - p). This is useful for creating test and train splits.

Example

Expand
ghci> import System.Random
ghci> D.randomSplit (mkStdGen 137) 0.9 df

kFolds :: RandomGen g => g -> Int -> DataFrame -> [DataFrame] Source #

Creates n folds of a dataframe.

Example

Expand
ghci> import System.Random
ghci> D.kFolds (mkStdGen 137) 5 df