| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
DataFrame.Lazy.Internal.DataFrame
Synopsis
- data LazyDataFrame = LazyDataFrame {
- plan :: LogicalPlan
- batchSize :: Int
- runDataFrame :: LazyDataFrame -> IO DataFrame
- fromDataFrame :: DataFrame -> LazyDataFrame
- scanCsv :: Schema -> Text -> LazyDataFrame
- scanSeparated :: Char -> Schema -> Text -> LazyDataFrame
- scanParquet :: Schema -> Text -> LazyDataFrame
- derive :: Columnable a => Text -> Expr a -> LazyDataFrame -> LazyDataFrame
- select :: [Text] -> LazyDataFrame -> LazyDataFrame
- filter :: Expr Bool -> LazyDataFrame -> LazyDataFrame
- join :: JoinType -> Text -> Text -> LazyDataFrame -> LazyDataFrame -> LazyDataFrame
- groupBy :: [Text] -> [(Text, UExpr)] -> LazyDataFrame -> LazyDataFrame
- sortBy :: [(Text, SortOrder)] -> LazyDataFrame -> LazyDataFrame
- limit :: Int -> LazyDataFrame -> LazyDataFrame
Documentation
data LazyDataFrame Source #
A lazy query that has not been executed yet.
The query is represented as a LogicalPlan tree; execution is deferred
until runDataFrame is called.
Constructors
| LazyDataFrame | |
Fields
| |
Instances
| Show LazyDataFrame Source # | |
Defined in DataFrame.Lazy.Internal.DataFrame Methods showsPrec :: Int -> LazyDataFrame -> ShowS # show :: LazyDataFrame -> String # showList :: [LazyDataFrame] -> ShowS # | |
runDataFrame :: LazyDataFrame -> IO DataFrame Source #
Execute the lazy query: optimise the logical plan, then stream-execute
the resulting physical plan, returning a fully-materialised DataFrame.
fromDataFrame :: DataFrame -> LazyDataFrame Source #
Lift an already-loaded eager DataFrame into the lazy plan.
scanCsv :: Schema -> Text -> LazyDataFrame Source #
Scan a CSV file with the default comma separator.
scanSeparated :: Char -> Schema -> Text -> LazyDataFrame Source #
Scan a character-separated file.
scanParquet :: Schema -> Text -> LazyDataFrame Source #
Scan a Parquet file, directory of files, or glob pattern.
derive :: Columnable a => Text -> Expr a -> LazyDataFrame -> LazyDataFrame Source #
Add a computed column (or overwrite an existing one).
select :: [Text] -> LazyDataFrame -> LazyDataFrame Source #
Retain only the listed columns.
filter :: Expr Bool -> LazyDataFrame -> LazyDataFrame Source #
Keep rows that satisfy the predicate.
Arguments
| :: JoinType | |
| -> Text | Left join key column name |
| -> Text | Right join key column name |
| -> LazyDataFrame | Left sub-query |
| -> LazyDataFrame | Right sub-query |
| -> LazyDataFrame |
Join two lazy queries on the given key columns.
Arguments
| :: [Text] | Group-by key columns |
| -> [(Text, UExpr)] | [(outputName, aggregateExpr)] |
| -> LazyDataFrame | |
| -> LazyDataFrame |
Group by a set of columns and compute aggregate expressions.
Each aggregate expression should use an Agg node (e.g. sumOf, meanOf).
sortBy :: [(Text, SortOrder)] -> LazyDataFrame -> LazyDataFrame Source #
Sort the result by the given (column, direction) pairs.
limit :: Int -> LazyDataFrame -> LazyDataFrame Source #
Retain at most n rows.