| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
DataFrame.Operations.Core
Synopsis
- dimensions :: DataFrame -> (Int, Int)
- nRows :: DataFrame -> Int
- nColumns :: DataFrame -> Int
- columnNames :: DataFrame -> [Text]
- insertVector :: Columnable a => Text -> Vector a -> DataFrame -> DataFrame
- insert :: (Columnable a, Foldable t) => Text -> t a -> DataFrame -> DataFrame
- insertVectorWithDefault :: Columnable a => a -> Text -> Vector a -> DataFrame -> DataFrame
- insertWithDefault :: (Columnable a, Foldable t) => a -> Text -> t a -> DataFrame -> DataFrame
- insertUnboxedVector :: (Columnable a, Unbox a) => Text -> Vector a -> DataFrame -> DataFrame
- insertColumn :: Text -> Column -> DataFrame -> DataFrame
- cloneColumn :: Text -> Text -> DataFrame -> DataFrame
- rename :: Text -> Text -> DataFrame -> DataFrame
- renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
- renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
- data ColumnInfo = ColumnInfo {
- nameOfColumn :: !Text
- nonNullValues :: !Int
- nullValues :: !Int
- typeOfColumn :: !Text
- describeColumns :: DataFrame -> DataFrame
- nulls :: Column -> Int
- partiallyParsed :: Column -> Int
- fromNamedColumns :: [(Text, Column)] -> DataFrame
- fromUnnamedColumns :: [Column] -> DataFrame
- fromRows :: [Text] -> [[Any]] -> DataFrame
- valueCounts :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Int)]
- valueProportions :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Double)]
- fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
- toFloatMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Float))
- toDoubleMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Double))
- toIntMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Int))
- columnAsVector :: Columnable a => Expr a -> DataFrame -> Either DataFrameException (Vector a)
- columnAsIntVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Int)
- columnAsDoubleVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Double)
- columnAsFloatVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Float)
- columnAsUnboxedVector :: (Columnable a, Unbox a) => Expr a -> DataFrame -> Either DataFrameException (Vector a)
- columnAsList :: Columnable a => Expr a -> DataFrame -> [a]
Documentation
dimensions :: DataFrame -> (Int, Int) Source #
O(1) Get DataFrame dimensions i.e. (rows, columns)
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.dimensions df
(100, 3)
nRows :: DataFrame -> Int Source #
O(1) Get number of rows in a dataframe.
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nRows df
100
nColumns :: DataFrame -> Int Source #
O(1) Get number of columns in a dataframe.
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.nColumns df
3
columnNames :: DataFrame -> [Text] Source #
O(k) Get column names of the DataFrame in order of insertion.
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("a", D.fromList [1..100]), ("b", D.fromList [1..100]), ("c", D.fromList [1..100])]
>>> D.columnNames df
["a", "b", "c"]
Arguments
| :: Columnable a | |
| => Text | Column Name |
| -> Vector a | Vector to add to column |
| -> DataFrame | DataFrame to add column to |
| -> DataFrame |
Adds a vector to the dataframe. If the vector has less elements than the dataframe and the dataframe is not empty
the vector is converted to type `Maybe a` filled with Nothing to match the size of the dataframe. Similarly,
if the vector has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe Type` and filled with Nothing.
Example
>>> :set -XOverloadedStrings >>> import qualified DataFrame as D >>> import qualified Data.Vector as V >>> D.insertVector "numbers" (V.fromList [(1 :: Int)..10]) D.empty -------- numbers -------- Int -------- 1 2 3 4 5 6 7 8 9 10
Arguments
| :: (Columnable a, Foldable t) | |
| => Text | Column Name |
| -> t a | Sequence to add to dataframe |
| -> DataFrame | DataFrame to add column to |
| -> DataFrame |
Adds a foldable collection to the dataframe. If the collection has less elements than the
dataframe and the dataframe is not empty
the collection is converted to type `Maybe a` filled with Nothing to match the size of the dataframe. Similarly,
if the collection has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe Type` and filled with Nothing.
Be careful not to insert infinite collections with this function as that will crash the program.
Example
>>> :set -XOverloadedStrings >>> import qualified DataFrame as D >>> D.insert "numbers" [(1 :: Int)..10] D.empty -------- numbers -------- Int -------- 1 2 3 4 5 6 7 8 9 10
insertVectorWithDefault Source #
Arguments
| :: Columnable a | |
| => a | Default Value |
| -> Text | Column name |
| -> Vector a | Data to add to column |
| -> DataFrame | DataFrame to add the column to |
| -> DataFrame |
Adds a vector to the dataframe and pads it with a default value if it has less elements than the number of rows.
Example
>>> :set -XOverloadedStrings
>>> import qualified Data.Vector as V
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertVectorWithDefault 0 "numbers" (V.fromList [(1 :: Int),2,3]) df
-------------
x | numbers
----|--------
Int | Int
----|--------
1 | 1
2 | 2
3 | 3
4 | 0
5 | 0
6 | 0
7 | 0
8 | 0
9 | 0
10 | 0
Arguments
| :: (Columnable a, Foldable t) | |
| => a | Default Value |
| -> Text | Column name |
| -> t a | Data to add to column |
| -> DataFrame | DataFrame to add the column to |
| -> DataFrame |
Adds a list to the dataframe and pads it with a default value if it has less elements than the number of rows.
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> df = D.fromNamedColumns [("x", D.fromList [(1 :: Int)..10])]
>>> D.insertWithDefault 0 "numbers" [(1 :: Int),2,3] df
-------------
x | numbers
----|--------
Int | Int
----|--------
1 | 1
2 | 2
3 | 3
4 | 0
5 | 0
6 | 0
7 | 0
8 | 0
9 | 0
10 | 0
Arguments
| :: (Columnable a, Unbox a) | |
| => Text | Column Name |
| -> Vector a | Unboxed vector to add to column |
| -> DataFrame | DataFrame to add the column to |
| -> DataFrame |
O(n) Adds an unboxed vector to the dataframe.
Same as insertVector but takes an unboxed vector. If you insert a vector of numbers through insertVector it will either way be converted into an unboxed vector so this function saves that extra work/conversion.
Arguments
| :: Text | Column Name |
| -> Column | Column to add |
| -> DataFrame | DataFrame to add the column to |
| -> DataFrame |
O(n) Add a column to the dataframe.
Example
>>> :set -XOverloadedStrings >>> import qualified DataFrame as D >>> D.insertColumn "numbers" (D.fromList [(1 :: Int)..10]) D.empty -------- numbers -------- Int -------- 1 2 3 4 5 6 7 8 9 10
cloneColumn :: Text -> Text -> DataFrame -> DataFrame Source #
O(n) Clones a column and places it under a new name in the dataframe.
Example
>>> :set -XOverloadedStrings >>> import qualified Data.Vector as V >>> df = insertVector "numbers" (V.fromList [1..10]) D.empty >>> D.cloneColumn "numbers" "others" df ----------------- numbers | others ---------|------- Int | Int ---------|------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 6 7 | 7 8 | 8 9 | 9 10 | 10
rename :: Text -> Text -> DataFrame -> DataFrame Source #
O(n) Renames a single column.
Example
>>> :set -XOverloadedStrings >>> import qualified DataFrame as D >>> import qualified Data.Vector as V >>> df = insertVector "numbers" (V.fromList [1..10]) D.empty >>> D.rename "numbers" "others" df ------- others ------- Int ------- 1 2 3 4 5 6 7 8 9 10
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame Source #
O(n) Renames many columns.
Example
>>> :set -XOverloadedStrings
>>> import qualified DataFrame as D
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> df
-----------------
numbers | others
---------|-------
Int | Int
---------|-------
1 | 11
2 | 12
3 | 13
4 | 14
5 | 15
6 | 16
7 | 17
8 | 18
9 | 19
10 | 20
>>> D.renameMany [("numbers", "first_10"), ("others", "next_10")] df
-------------------
first_10 | next_10
----------|--------
Int | Int
----------|--------
1 | 11
2 | 12
3 | 13
4 | 14
5 | 15
6 | 16
7 | 17
8 | 18
9 | 19
10 | 20
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame Source #
data ColumnInfo Source #
Constructors
| ColumnInfo | |
Fields
| |
describeColumns :: DataFrame -> DataFrame Source #
O(n * k ^ 2) Returns the number of non-null columns in the dataframe and the type associated with each column.
Example
>>> import qualified Data.Vector as V
>>> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty)
>>> D.describeColumns df
--------------------------------------------------------
Column Name | # Non-null Values | # Null Values | Type
-------------|-------------------|---------------|-----
Text | Int | Int | Text
-------------|-------------------|---------------|-----
others | 10 | 0 | Int
numbers | 10 | 0 | Int
partiallyParsed :: Column -> Int Source #
fromNamedColumns :: [(Text, Column)] -> DataFrame Source #
Creates a dataframe from a list of tuples with name and column.
Example
>>> df = D.fromNamedColumns [("numbers", D.fromList [1..10]), ("others", D.fromList [11..20])]
>>> df
-----------------
numbers | others
---------|-------
Int | Int
---------|-------
1 | 11
2 | 12
3 | 13
4 | 14
5 | 15
6 | 16
7 | 17
8 | 18
9 | 19
10 | 20
fromUnnamedColumns :: [Column] -> DataFrame Source #
Create a dataframe from a list of columns. The column names are "0", "1"... etc. Useful for quick exploration but you should probably always rename the columns after or drop the ones you don't want.
Example
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]] >>> df ----------------- 0 | 1 -----|---- Int | Int -----|---- 1 | 11 2 | 12 3 | 13 4 | 14 5 | 15 6 | 16 7 | 17 8 | 18 9 | 19 10 | 20
valueCounts :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Int)] Source #
O (k * n) Counts the occurences of each value in a given column.
Example
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]] >>> D.valueCounts @Int "0" df [(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1)]
valueProportions :: (Ord a, Columnable a) => Expr a -> DataFrame -> [(a, Double)] Source #
O (k * n) Shows the proportions of each value in a given column.
Example
>>> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]] >>> D.valueCounts @Int "0" df [(1,0.1),(2,0.1),(3,0.1),(4,0.1),(5,0.1),(6,0.1),(7,0.1),(8,0.1),(9,0.1),(10,0.1)]
fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #
A left fold for dataframes that takes the dataframe as the last object. This makes it easier to chain operations.
Example
>>> df = D.fromNamedColumns [("x", D.fromList [1..100]), ("y", D.fromList [11..110])]
>>> D.fold D.dropLast [1..5] df
---------
x | y
----|----
Int | Int
----|----
1 | 11
2 | 12
3 | 13
4 | 14
5 | 15
6 | 16
7 | 17
8 | 18
9 | 19
10 | 20
11 | 21
12 | 22
13 | 23
14 | 24
15 | 25
16 | 26
17 | 27
18 | 28
19 | 29
20 | 30
Showing 20 rows out of 85
toFloatMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Float)) Source #
Returns a dataframe as a two dimensional vector of floats.
Converts all columns in the dataframe to float vectors and transposes them into a row-major matrix representation.
This is useful for handing data over into ML systems.
Returns Left with an error if any column cannot be converted to floats.
toDoubleMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Double)) Source #
Returns a dataframe as a two dimensional vector of doubles.
Converts all columns in the dataframe to double vectors and transposes them into a row-major matrix representation.
This is useful for handing data over into ML systems.
Returns Left with an error if any column cannot be converted to doubles.
toIntMatrix :: DataFrame -> Either DataFrameException (Vector (Vector Int)) Source #
Returns a dataframe as a two dimensional vector of ints.
Converts all columns in the dataframe to int vectors and transposes them into a row-major matrix representation.
This is useful for handing data over into ML systems.
Returns Left with an error if any column cannot be converted to ints.
columnAsVector :: Columnable a => Expr a -> DataFrame -> Either DataFrameException (Vector a) Source #
Get a specific column as a vector.
You must specify the type via type applications.
Examples
>>>columnAsVector (F.col @Int "age") dfRight [25, 30, 35, ...]
>>>columnAsVector (F.col @Text "name") dfRight ["Alice", "Bob", "Charlie", ...]
columnAsIntVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Int) Source #
Retrieves a column as an unboxed vector of Int values.
Returns Left with a DataFrameException if the column cannot be converted to ints.
This may occur if the column contains non-numeric data or values outside the Int range.
columnAsDoubleVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Double) Source #
Retrieves a column as an unboxed vector of Double values.
Returns Left with a DataFrameException if the column cannot be converted to doubles.
This may occur if the column contains non-numeric data.
columnAsFloatVector :: (Columnable a, Num a) => Expr a -> DataFrame -> Either DataFrameException (Vector Float) Source #
Retrieves a column as an unboxed vector of Float values.
Returns Left with a DataFrameException if the column cannot be converted to floats.
This may occur if the column contains non-numeric data.
columnAsUnboxedVector :: (Columnable a, Unbox a) => Expr a -> DataFrame -> Either DataFrameException (Vector a) Source #
columnAsList :: Columnable a => Expr a -> DataFrame -> [a] Source #
Get a specific column as a list.
You must specify the type via type applications.
Examples
>>>columnAsList @Int "age" df[25, 30, 35, ...]
>>>columnAsList @Text "name" df["Alice", "Bob", "Charlie", ...]
Throws
error- if the column type doesn't match the requested type