Safe Haskell | None |
---|---|
Language | Haskell2010 |
DataFrame.Operations.Subset
Synopsis
- take :: Int -> DataFrame -> DataFrame
- takeLast :: Int -> DataFrame -> DataFrame
- drop :: Int -> DataFrame -> DataFrame
- dropLast :: Int -> DataFrame -> DataFrame
- range :: (Int, Int) -> DataFrame -> DataFrame
- clip :: Int -> Int -> Int -> Int
- filter :: Columnable a => Text -> (a -> Bool) -> DataFrame -> DataFrame
- filterByVector :: forall a b v. (Vector v b, Columnable a, Columnable b) => Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame
- indexes :: Vector v a => (a -> Bool) -> v a -> Vector Int
- filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame
- filterWhere :: Expr Bool -> DataFrame -> DataFrame
- filterJust :: Text -> DataFrame -> DataFrame
- filterNothing :: Text -> DataFrame -> DataFrame
- filterAllJust :: DataFrame -> DataFrame
- cube :: (Int, Int) -> DataFrame -> DataFrame
- select :: [Text] -> DataFrame -> DataFrame
- data SelectionCriteria
- = ColumnProperty (Column -> Bool)
- | ColumnNameProperty (Text -> Bool)
- | ColumnTextRange (Text, Text)
- | ColumnIndexRange (Int, Int)
- | ColumnName Text
- byName :: Text -> SelectionCriteria
- byProperty :: (Column -> Bool) -> SelectionCriteria
- byNameProperty :: (Text -> Bool) -> SelectionCriteria
- byNameRange :: (Text, Text) -> SelectionCriteria
- byIndexRange :: (Int, Int) -> SelectionCriteria
- selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame
- exclude :: [Text] -> DataFrame -> DataFrame
- sample :: RandomGen g => g -> Double -> DataFrame -> DataFrame
- randomSplit :: RandomGen g => g -> Double -> DataFrame -> (DataFrame, DataFrame)
- kFolds :: RandomGen g => g -> Int -> DataFrame -> [DataFrame]
- generateRandomVector :: RandomGen g => g -> Int -> Vector Double
Documentation
Arguments
:: Columnable a | |
=> Text | Column to filter by |
-> (a -> Bool) | Filter condition |
-> DataFrame | Dataframe to filter |
-> DataFrame |
O(n * k) Filter rows by a given condition.
filter "x" even df
filterByVector :: forall a b v. (Vector v b, Columnable a, Columnable b) => Text -> v b -> (a -> Bool) -> DataFrame -> DataFrame Source #
filterBy :: Columnable a => (a -> Bool) -> Text -> DataFrame -> DataFrame Source #
O(k) a version of filter where the predicate comes first.
filterBy even "x" df
filterWhere :: Expr Bool -> DataFrame -> DataFrame Source #
O(k) filters the dataframe with a boolean expression.
filterWhere (F.col @Int x + F.col y F.> 5) df
filterJust :: Text -> DataFrame -> DataFrame Source #
O(k) removes all rows with Nothing
in a given column from the dataframe.
filterJust "col" df
filterNothing :: Text -> DataFrame -> DataFrame Source #
O(k) returns all rows with Nothing
in a give column.
filterNothing "col" df
filterAllJust :: DataFrame -> DataFrame Source #
O(n * k) removes all rows with Nothing
from the dataframe.
filterAllJust df
cube :: (Int, Int) -> DataFrame -> DataFrame Source #
O(k) cuts the dataframe in a cube of size (a, b) where a is the length and b is the width.
cube (10, 5) df
select :: [Text] -> DataFrame -> DataFrame Source #
O(n) Selects a number of columns in a given dataframe.
select ["name", "age"] df
data SelectionCriteria Source #
Constructors
ColumnProperty (Column -> Bool) | |
ColumnNameProperty (Text -> Bool) | |
ColumnTextRange (Text, Text) | |
ColumnIndexRange (Int, Int) | |
ColumnName Text |
byName :: Text -> SelectionCriteria Source #
Criteria for selecting a column by name.
selectBy [byName "Age"] df
equivalent to:
select ["Age"] df
byProperty :: (Column -> Bool) -> SelectionCriteria Source #
Criteria for selecting columns whose property satisfies given predicate.
selectBy [byProperty isNumeric] df
byNameProperty :: (Text -> Bool) -> SelectionCriteria Source #
Criteria for selecting columns whose name satisfies given predicate.
selectBy [byNameProperty (T.isPrefixOf "weight")] df
byNameRange :: (Text, Text) -> SelectionCriteria Source #
Criteria for selecting columns whose names are in the given lexicographic range (inclusive).
selectBy [byNameRange ("a", "c")] df
byIndexRange :: (Int, Int) -> SelectionCriteria Source #
Criteria for selecting columns whose indices are in the given (inclusive) range.
selectBy [byIndexRange (0, 5)] df
selectBy :: [SelectionCriteria] -> DataFrame -> DataFrame Source #
O(n) select columns by column predicate name.
sample :: RandomGen g => g -> Double -> DataFrame -> DataFrame Source #
Sample a dataframe. The double parameter must be between 0 and 1 (inclusive).
Example
ghci> import System.Random ghci> D.sample (mkStdGen 137) 0.1 df
randomSplit :: RandomGen g => g -> Double -> DataFrame -> (DataFrame, DataFrame) Source #
Split a dataset into two. The first in the tuple gets a sample of p (0 <= p <= 1) and the second gets (1 - p). This is useful for creating test and train splits.
Example
ghci> import System.Random ghci> D.randomSplit (mkStdGen 137) 0.9 df