dataframe-0.7.0.0: A fast, safe, and intuitive DataFrame library.
Safe HaskellNone
LanguageHaskell2010

DataFrame.Operations.Core

Synopsis

Documentation

dimensions :: DataFrame -> (Int, Int) Source #

O(1) Get DataFrame dimensions i.e. (rows, columns)

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.dimensions df

(100, 3)

nRows :: DataFrame -> Int Source #

O(1) Get number of rows in a dataframe.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nRows df
100

nColumns :: DataFrame -> Int Source #

O(1) Get number of columns in a dataframe.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nColumns df
3

columnNames :: DataFrame -> [Text] Source #

O(k) Get column names of the DataFrame in order of insertion.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.columnNames df

["a", "b", "c"]

insertVector Source #

Arguments

:: Columnable a 
=> Text

Column Name

-> Vector a

Vector to add to column

-> DataFrame

DataFrame to add column to

-> DataFrame 

Adds a vector to the dataframe. If the vector has less elements than the dataframe and the dataframe is not empty the vector is converted to type `Maybe a` filled with Nothing to match the size of the dataframe. Similarly, if the vector has more elements than what's currently in the dataframe, the other columns in the dataframe are change to `Maybe Type` and filled with Nothing.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> D.insertVector "numbers" (V.fromList [(1 :: Int)..10]) D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

insert Source #

Arguments

:: (Columnable a, Foldable t) 
=> Text

Column Name

-> t a

Sequence to add to dataframe

-> DataFrame

DataFrame to add column to

-> DataFrame 

Adds a foldable collection to the dataframe. If the collection has less elements than the dataframe and the dataframe is not empty the collection is converted to type `Maybe a` filled with Nothing to match the size of the dataframe. Similarly, if the collection has more elements than what's currently in the dataframe, the other columns in the dataframe are change to `Maybe Type` and filled with Nothing.

Be careful not to insert infinite collections with this function as that will crash the program.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> D.insert "numbers" [(1 :: Int)..10] D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

insertVectorWithDefault Source #

Arguments

:: Columnable a 
=> a

Default Value

-> Text

Column name

-> Vector a

Data to add to column

-> DataFrame

DataFrame to add the column to

-> DataFrame 

Adds a vector to the dataframe and pads it with a default value if it has less elements than the number of rows.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified Data.Vector as V
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertVectorWithDefault 0 "numbers" (V.fromList [(1 :: Int),2,3]) df

-------------
 x  | numbers
----|--------
Int |   Int
----|--------
1   | 1
2   | 2
3   | 3
4   | 0
5   | 0
6   | 0
7   | 0
8   | 0
9   | 0
10  | 0

insertWithDefault Source #

Arguments

:: (Columnable a, Foldable t) 
=> a

Default Value

-> Text

Column name

-> t a

Data to add to column

-> DataFrame

DataFrame to add the column to

-> DataFrame 

Adds a list to the dataframe and pads it with a default value if it has less elements than the number of rows.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertWithDefault 0 "numbers" [(1 :: Int),2,3] df

-------------
 x  | numbers
----|--------
Int |   Int
----|--------
1   | 1
2   | 2
3   | 3
4   | 0
5   | 0
6   | 0
7   | 0
8   | 0
9   | 0
10  | 0

insertUnboxedVector Source #

Arguments

:: (Columnable a, Unbox a) 
=> Text

Column Name

-> Vector a

Unboxed vector to add to column

-> DataFrame

DataFrame to add the column to

-> DataFrame 

O(n) Adds an unboxed vector to the dataframe.

Same as insertVector but takes an unboxed vector. If you insert a vector of numbers through insertVector it will either way be converted into an unboxed vector so this function saves that extra work/conversion.

insertColumn Source #

Arguments

:: Text

Column Name

-> Column

Column to add

-> DataFrame

DataFrame to add the column to

-> DataFrame 

O(n) Add a column to the dataframe.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> D.insertColumn "numbers" (D.fromList [(1 :: Int)..10]) D.empty

--------
 numbers
--------
   Int
--------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

cloneColumn :: Text -> Text -> DataFrame -> DataFrame Source #

O(n) Clones a column and places it under a new name in the dataframe.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified Data.Vector as V
>>> df = insertVector "numbers" (V.fromList [1..10]) D.empty
>>> D.cloneColumn "numbers" "others" df

-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 1
 2       | 2
 3       | 3
 4       | 4
 5       | 5
 6       | 6
 7       | 7
 8       | 8
 9       | 9
 10      | 10

rename :: Text -> Text -> DataFrame -> DataFrame Source #

O(n) Renames a single column.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> df = insertVector "numbers" (V.fromList [1..10]) D.empty
>>> D.rename "numbers" "others" df

-------
 others
-------
  Int
-------
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10

renameMany :: [(Text, Text)] -> DataFrame -> DataFrame Source #

O(n) Renames many columns.

Example

Expand
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> df

-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 11
 2       | 12
 3       | 13
 4       | 14
 5       | 15
 6       | 16
 7       | 17
 8       | 18
 9       | 19
 10      | 20

>>> D.renameMany [("numbers", "first_10"), ("others", "next_10")] df

-------------------
 first_10 | next_10
----------|--------
   Int    |   Int
----------|--------
 1        | 11
 2        | 12
 3        | 13
 4        | 14
 5        | 15
 6        | 16
 7        | 17
 8        | 18
 9        | 19
 10       | 20

describeColumns :: DataFrame -> DataFrame Source #

O(n * k ^ 2) Returns the number of non-null columns in the dataframe and the type associated with each column.

Example

Expand
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> D.describeColumns df

--------------------------------------------------------
 Column Name | # Non-null Values | # Null Values | Type
-------------|-------------------|---------------|-----
    Text     |        Int        |      Int      | Text
-------------|-------------------|---------------|-----
 others      | 10                | 0             | Int
 numbers     | 10                | 0             | Int

fromNamedColumns :: [(Text, Column)] -> DataFrame Source #

Creates a dataframe from a list of tuples with name and column.

Example

Expand
>>> df = D.fromNamedColumns [("numbers", D.fromList [1..10]), ("others", D.fromList [11..20])]
>>> df
-----------------
 numbers | others
---------|-------
   Int   |  Int
---------|-------
 1       | 11
 2       | 12
 3       | 13
 4       | 14
 5       | 15
 6       | 16
 7       | 17
 8       | 18
 9       | 19
 10      | 20

fromUnnamedColumns :: [Column] -> DataFrame Source #

Create a dataframe from a list of columns. The column names are "0", "1"... etc. Useful for quick exploration but you should probably always rename the columns after or drop the ones you don't want.

Example

Expand
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]
>>> df
-----------------
  0  |  1
-----|----
 Int | Int
-----|----
 1   | 11
 2   | 12
 3   | 13
 4   | 14
 5   | 15
 6   | 16
 7   | 17
 8   | 18
 9   | 19
 10  | 20

fromRows :: [Text] -> [[Any]] -> DataFrame Source #

Create a dataframe from a list of column names and rows.

Example

Expand
>>> df = D.fromRows [A, B] [[D.toAny 1, D.toAny 11], [D.toAny 2, D.toAny 12], [D.toAny 3, D.toAny 13]]

>>> df

----------
  A  |  B
-----|----
 Int | Int
-----|----
 1   | 11
 2   | 12
 3   | 13

valueCounts :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Int)] Source #

O (k * n) Counts the occurences of each value in a given column.

Example

Expand
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

>>> D.valueCounts @Int "0" df

[(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1)]

valueProportions :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Double)] Source #

O (k * n) Shows the proportions of each value in a given column.

Example

Expand
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]]

>>> D.valueCounts @Int "0" df

[(1,0.1),(2,0.1),(3,0.1),(4,0.1),(5,0.1),(6,0.1),(7,0.1),(8,0.1),(9,0.1),(10,0.1)]

fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #

A left fold for dataframes that takes the dataframe as the last object. This makes it easier to chain operations.

Example

Expand
>>> df = D.fromNamedColumns [("x", D.fromList [1..100]), ("y", D.fromList [11..110])]
>>> D.fold D.dropLast [1..5] df

---------
 x  |  y
----|----
Int | Int
----|----
1   | 11
2   | 12
3   | 13
4   | 14
5   | 15
6   | 16
7   | 17
8   | 18
9   | 19
10  | 20
11  | 21
12  | 22
13  | 23
14  | 24
15  | 25
16  | 26
17  | 27
18  | 28
19  | 29
20  | 30

Showing 20 rows out of 85

toFloatMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Float)) Source #

Returns a dataframe as a two dimensional vector of floats.

Converts all columns in the dataframe to float vectors and transposes them into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns Left with an error if any column cannot be converted to floats.

toDoubleMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Double)) Source #

Returns a dataframe as a two dimensional vector of doubles.

Converts all columns in the dataframe to double vectors and transposes them into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns Left with an error if any column cannot be converted to doubles.

toIntMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Int)) Source #

Returns a dataframe as a two dimensional vector of ints.

Converts all columns in the dataframe to int vectors and transposes them into a row-major matrix representation.

This is useful for handing data over into ML systems.

Returns Left with an error if any column cannot be converted to ints.

columnAsVector :: Columnable a => Expr a -> DataFrame -> Either DataFrameException (Vector a) Source #

Get a specific column as a vector.

You must specify the type via type applications.

Examples

Expand
>>> columnAsVector (F.col @Int "age") df
Right [25, 30, 35, ...]
>>> columnAsVector (F.col @Text "name") df
Right ["Alice", "Bob", "Charlie", ...]

columnAsIntVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Int) Source #

Retrieves a column as an unboxed vector of Int values.

Returns Left with a DataFrameException if the column cannot be converted to ints. This may occur if the column contains non-numeric data or values outside the Int range.

columnAsDoubleVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Double) Source #

Retrieves a column as an unboxed vector of Double values.

Returns Left with a DataFrameException if the column cannot be converted to doubles. This may occur if the column contains non-numeric data.

columnAsFloatVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Float) Source #

Retrieves a column as an unboxed vector of Float values.

Returns Left with a DataFrameException if the column cannot be converted to floats. This may occur if the column contains non-numeric data.

columnAsList :: Columnable a => Expr a -> DataFrame -> [a] Source #

Get a specific column as a list.

You must specify the type via type applications.

Examples

Expand
>>> columnAsList @Int "age" df
[25, 30, 35, ...]
>>> columnAsList @Text "name" df
["Alice", "Bob", "Charlie", ...]

Throws

Expand
  • error - if the column type doesn't match the requested type