Safe Haskell | None |
---|---|
Language | Haskell2010 |
DataFrame.Operations.Core
Synopsis
- dimensions :: DataFrame -> (Int, Int)
- columnNames :: DataFrame -> [Text]
- insertVector :: Columnable a => Text -> Vector a -> DataFrame -> DataFrame
- insertVectorWithDefault :: Columnable a => a -> Text -> Vector a -> DataFrame -> DataFrame
- insertUnboxedVector :: (Columnable a, Unbox a) => Text -> Vector a -> DataFrame -> DataFrame
- insertColumn :: Text -> Column -> DataFrame -> DataFrame
- cloneColumn :: Text -> Text -> DataFrame -> DataFrame
- rename :: Text -> Text -> DataFrame -> DataFrame
- renameMany :: [(Text, Text)] -> DataFrame -> DataFrame
- renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame
- data ColumnInfo = ColumnInfo {
- nameOfColumn :: !Text
- nonNullValues :: !Int
- nullValues :: !Int
- partiallyParsedValues :: !Int
- uniqueValues :: !Int
- typeOfColumn :: !Text
- describeColumns :: DataFrame -> DataFrame
- nulls :: Column -> Int
- partiallyParsed :: Column -> Int
- fromNamedColumns :: [(Text, Column)] -> DataFrame
- fromUnnamedColumns :: [Column] -> DataFrame
- fromRows :: [Text] -> [[Any]] -> DataFrame
- valueCounts :: Columnable a => Text -> DataFrame -> [(a, Int)]
- fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
Documentation
dimensions :: DataFrame -> (Int, Int) Source #
O(1) Get DataFrame dimensions i.e. (rows, columns)
Example
ghci> D.dimensions df (100, 3)
columnNames :: DataFrame -> [Text] Source #
O(k) Get column names of the DataFrame in order of insertion.
Example
ghci> D.columnNames df ["col_a", "col_b", "col_c"]
Arguments
:: Columnable a | |
=> Text | Column Name |
-> Vector a | Vector to add to column |
-> DataFrame | DataFrame to add column to |
-> DataFrame |
Adds a vector to the dataframe. If the vector has less elements than the dataframe and the dataframe is not empty
the vector is converted to type `Maybe a` filled with Nothing
to match the size of the dataframe. Similarly,
if the vector has more elements than what's currently in the dataframe, the other columns in the dataframe are
change to `Maybe Type` and filled with Nothing
.
Example
ghci> import qualified Data.Vector as V ghci> D.insertVector "numbers" (V.fromList [1..10]) D.empty --------------- index | numbers ------|-------- Int | Int ------|-------- 0 | 1 1 | 2 2 | 3 3 | 4 4 | 5 5 | 6 6 | 7 7 | 8 8 | 9 9 | 10
insertVectorWithDefault Source #
Arguments
:: Columnable a | |
=> a | Default Value |
-> Text | Column name |
-> Vector a | Data to add to column |
-> DataFrame | DataFrame to add the column to |
-> DataFrame |
O(k) Add a column to the dataframe providing a default. This constructs a new vector and also may convert it to an unboxed vector if necessary. Since columns are usually large the runtime is dominated by the length of the list, k.
Arguments
:: (Columnable a, Unbox a) | |
=> Text | Column Name |
-> Vector a | Unboxed vector to add to column |
-> DataFrame | DataFrame to add the column to |
-> DataFrame |
O(n) Adds an unboxed vector to the dataframe.
Same as insertVector but takes an unboxed vector. If you insert a vector of numbers through insertVector it will either way be converted into an unboxed vector so this function saves that extra work/conversion.
Arguments
:: Text | Column Name |
-> Column | Column to add |
-> DataFrame | DataFrame to add the column to |
-> DataFrame |
O(n) Add a column to the dataframe.
Example
ghci> D.insertColumn "numbers" (D.fromList [1..10]) D.empty --------------- index | numbers ------|-------- Int | Int ------|-------- 0 | 1 1 | 2 2 | 3 3 | 4 4 | 5 5 | 6 6 | 7 7 | 8 8 | 9 9 | 10
cloneColumn :: Text -> Text -> DataFrame -> DataFrame Source #
O(n) Clones a column and places it under a new name in the dataframe.
Example
ghci> import qualified Data.Vector as V ghci> df = insertVector "numbers" (V.fromList [1..10]) D.empty ghci> D.cloneColumn "numbers" "others" df ------------------------ index | numbers | others ------|---------|------- Int | Int | Int ------|---------|------- 0 | 1 | 1 1 | 2 | 2 2 | 3 | 3 3 | 4 | 4 4 | 5 | 5 5 | 6 | 6 6 | 7 | 7 7 | 8 | 8 8 | 9 | 9 9 | 10 | 10
rename :: Text -> Text -> DataFrame -> DataFrame Source #
O(n) Renames a single column.
Example
ghci> import qualified Data.Vector as V ghci> df = insertVector "numbers" (V.fromList [1..10]) D.empty ghci> D.rename "numbers" "others" df -------------- index | others ------|------- Int | Int ------|------- 0 | 1 1 | 2 2 | 3 3 | 4 4 | 5 5 | 6 6 | 7 7 | 8 8 | 9 9 | 10
renameMany :: [(Text, Text)] -> DataFrame -> DataFrame Source #
O(n) Renames many columns.
Example
ghci> import qualified Data.Vector as V ghci> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty) ghci> df ------------------------ index | numbers | others ------|---------|------- Int | Int | Int ------|---------|------- 0 | 1 | 11 1 | 2 | 12 2 | 3 | 13 3 | 4 | 14 4 | 5 | 15 5 | 6 | 16 6 | 7 | 17 7 | 8 | 18 8 | 9 | 19 9 | 10 | 20 ghci> D.renameMany [("numbers", "first_10"), ("others", "next_10")] df -------------------------- index | first_10 | next_10 ------|----------|-------- Int | Int | Int ------|----------|-------- 0 | 1 | 11 1 | 2 | 12 2 | 3 | 13 3 | 4 | 14 4 | 5 | 15 5 | 6 | 16 6 | 7 | 17 7 | 8 | 18 8 | 9 | 19 9 | 10 | 20
renameSafe :: Text -> Text -> DataFrame -> Either DataFrameException DataFrame Source #
data ColumnInfo Source #
Constructors
ColumnInfo | |
Fields
|
describeColumns :: DataFrame -> DataFrame Source #
O(n * k ^ 2) Returns the number of non-null columns in the dataframe and the type associated with each column.
Example
ghci> import qualified Data.Vector as V ghci> df = D.insertVector "others" (V.fromList [11..20]) (D.insertVector "numbers" (V.fromList [1..10]) D.empty) ghci> D.describeColumns df ----------------------------------------------------------------------------------------------------- index | Column Name | # Non-null Values | # Null Values | # Partially parsed | # Unique Values | Type ------|-------------|-------------------|---------------|--------------------|-----------------|----- Int | Text | Int | Int | Int | Int | Text ------|-------------|-------------------|---------------|--------------------|-----------------|----- 0 | others | 10 | 0 | 0 | 10 | Int 1 | numbers | 10 | 0 | 0 | 10 | Int
partiallyParsed :: Column -> Int Source #
fromNamedColumns :: [(Text, Column)] -> DataFrame Source #
Creates a dataframe from a list of tuples with name and column.
Example
ghci> df = D.fromNamedColumns [("numbers", D.fromList [1..10]), ("others", D.fromList [11..20])] ghci> df ------------------------ index | numbers | others ------|---------|------- Int | Int | Int ------|---------|------- 0 | 1 | 11 1 | 2 | 12 2 | 3 | 13 3 | 4 | 14 4 | 5 | 15 5 | 6 | 16 6 | 7 | 17 7 | 8 | 18 8 | 9 | 19 9 | 10 | 20
fromUnnamedColumns :: [Column] -> DataFrame Source #
Create a dataframe from a list of columns. The column names are "0", "1"... etc. Useful for quick exploration but you should probably always rename the columns after or drop the ones you don't want.
Example
ghci> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]] ghci> df ----------------- index | 0 | 1 ------|-----|---- Int | Int | Int ------|-----|---- 0 | 1 | 11 1 | 2 | 12 2 | 3 | 13 3 | 4 | 14 4 | 5 | 15 5 | 6 | 16 6 | 7 | 17 7 | 8 | 18 8 | 9 | 19 9 | 10 | 20
valueCounts :: Columnable a => Text -> DataFrame -> [(a, Int)] Source #
O (k * n) Counts the occurences of each value in a given column.
Example
ghci> df = D.fromUnnamedColumns [D.fromList [1..10], D.fromList [11..20]] ghci> D.valueCounts @Int "0" df [(1,1),(2,1),(3,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1)]
fold :: (a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame Source #
A left fold for dataframes that takes the dataframe as the last object. This makes it easier to chain operations.
Example
ghci> D.fold (const id) [1..5] df ----------------- index | 0 | 1 ------|-----|---- Int | Int | Int ------|-----|---- 0 | 1 | 11 1 | 2 | 12 2 | 3 | 13 3 | 4 | 14 4 | 5 | 15 5 | 6 | 16 6 | 7 | 17 7 | 8 | 18 8 | 9 | 19 9 | 10 | 20