Safe Haskell	None
Language	Haskell2010

DataFrame.Operations.Subset

Synopsis

take :: Int -> DataFrame -> DataFrame
takeLast :: Int -> DataFrame -> DataFrame
drop :: Int -> DataFrame -> DataFrame
dropLast :: Int -> DataFrame -> DataFrame
range :: (Int, Int) -> DataFrame -> DataFrame
clip :: Int -> Int -> Int -> Int
filter :: Columnable a => Text -> (a -> Bool) -> DataFrame -> DataFrame
filterByVector :: forall a b v. (Vector v b, Columnable a, Columnable b) => Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
indexes :: Vector v a => (a -> Bool) -> v a -> Vector Int
filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame
filterWhere :: Expr Bool -> DataFrame -> DataFrame
filterJust :: Text -> DataFrame -> DataFrame
filterNothing :: Text -> DataFrame -> DataFrame
filterAllJust :: DataFrame -> DataFrame
cube :: (Int, Int) -> DataFrame -> DataFrame
select :: [Text] -> DataFrame -> DataFrame
data SelectionCriteria
- = ColumnProperty (Column -> Bool)
- | ColumnNameProperty (Text -> Bool)
- | ColumnTextRange (Text, Text)
- | ColumnIndexRange (Int, Int)
- | ColumnName Text
byName :: Text -> SelectionCriteria
byProperty :: (Column -> Bool) -> SelectionCriteria
byNameProperty :: (Text -> Bool) -> SelectionCriteria
byNameRange :: (Text, Text) -> SelectionCriteria
byIndexRange :: (Int, Int) -> SelectionCriteria
selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame
exclude :: [Text] -> DataFrame -> DataFrame
sample :: RandomGen g => g -> Double -> DataFrame -> DataFrame
randomSplit :: RandomGen g => g -> Double -> DataFrame -> (DataFrame, DataFrame)
kFolds :: RandomGen g => g -> Int -> DataFrame -> [DataFrame]
generateRandomVector :: RandomGen g => g -> Int -> Vector Double

Documentation

take :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the first n rows of a DataFrame.

takeLast :: Int -> DataFrame -> DataFrame Source #

O(k * n) Take the last n rows of a DataFrame.

drop :: Int -> DataFrame -> DataFrame Source #

O(k * n) Drop the first n rows of a DataFrame.

dropLast :: Int -> DataFrame -> DataFrame Source #

O(k * n) Drop the last n rows of a DataFrame.

range :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k * n) Take a range of rows of a DataFrame.

clip :: Int -> Int -> Int -> Int Source #

filter Source #

Arguments

:: Columnable a
=> Text	Column to filter by
-> (a -> Bool)	Filter condition
-> DataFrame	Dataframe to filter
-> DataFrame

O(n * k) Filter rows by a given condition.

filter "x" even df

filterByVector :: forall a b v. (Vector v b, Columnable a, Columnable b) => Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame Source #

indexes :: Vector v a => (a -> Bool) -> v a -> Vector Int Source #

filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame Source #

O(k) a version of filter where the predicate comes first.

filterBy even "x" df

filterWhere :: Expr Bool -> DataFrame -> DataFrame Source #

O(k) filters the dataframe with a boolean expression.

filterWhere (F.col @Int x + F.col y F.> 5) df

filterJust :: Text -> DataFrame -> DataFrame Source #

O(k) removes all rows with Nothing in a given column from the dataframe.

filterJust "col" df

filterNothing :: Text -> DataFrame -> DataFrame Source #

O(k) returns all rows with Nothing in a give column.

filterNothing "col" df

filterAllJust :: DataFrame -> DataFrame Source #

O(n * k) removes all rows with Nothing from the dataframe.

filterAllJust df

cube :: (Int, Int) -> DataFrame -> DataFrame Source #

O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.

cube (10, 5) df

select :: [Text] -> DataFrame -> DataFrame Source #

O(n) Selects a number of columns in a given dataframe.

select ["name", "age"] df

data SelectionCriteria Source #

Constructors

ColumnProperty (Column -> Bool)
ColumnNameProperty (Text -> Bool)
ColumnTextRange (Text, Text)
ColumnIndexRange (Int, Int)
ColumnName Text

byName :: Text -> SelectionCriteria Source #

Criteria for selecting a column by name.

selectBy [byName "Age"] df

equivalent to:

select ["Age"] df

byProperty :: (Column -> Bool) -> SelectionCriteria Source #

Criteria for selecting columns whose property satisfies given predicate.

selectBy [byProperty isNumeric] df

byNameProperty :: (Text -> Bool) -> SelectionCriteria Source #

Criteria for selecting columns whose name satisfies given predicate.

selectBy [byNameProperty (T.isPrefixOf "weight")] df

byNameRange :: (Text, Text) -> SelectionCriteria Source #

Criteria for selecting columns whose names are in the given lexicographic range (inclusive).

selectBy [byNameRange ("a", "c")] df

byIndexRange :: (Int, Int) -> SelectionCriteria Source #

Criteria for selecting columns whose indices are in the given (inclusive) range.

selectBy [byIndexRange (0, 5)] df

selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame Source #

O(n) select columns by column predicate name.

exclude :: [Text] -> DataFrame -> DataFrame Source #

O(n) inverse of select

exclude ["Name"] df

sample :: RandomGen g => g -> Double -> DataFrame -> DataFrame Source #

Sample a dataframe. The double parameter must be between 0 and 1 (inclusive).

Example

Expand

ghci> import System.Random
ghci> D.sample (mkStdGen 137) 0.1 df

randomSplit :: RandomGen g => g -> Double -> DataFrame -> (DataFrame, DataFrame) Source #

Split a dataset into two. The first in the tuple gets a sample of p (0 <= p <= 1) and the second gets (1 - p). This is useful for creating test and train splits.

Example

Expand

ghci> import System.Random
ghci> D.randomSplit (mkStdGen 137) 0.9 df

kFolds :: RandomGen g => g -> Int -> DataFrame -> [DataFrame] Source #

Creates n folds of a dataframe.

Example

Expand

ghci> import System.Random
ghci> D.kFolds (mkStdGen 137) 5 df

generateRandomVector :: RandomGen g => g -> Int -> Vector Double Source #