dataframe-0.7.0.0: A fast, safe, and intuitive DataFrame library.
Copyright(c) 2025
LicenseMIT
Maintainermschavinda@gmail.com
Stabilityexperimental
Safe HaskellNone
LanguageHaskell2010

DataFrame.Typed

Description

A type-safe layer over the dataframe library.

This module provides TypedDataFrame, a phantom-typed wrapper around the untyped DataFrame that tracks column names and types at compile time. All operations delegate to the untyped core at runtime; the phantom type is updated at compile time to reflect schema changes.

Key difference from untyped API: TExpr

All expression-taking operations use TExpr (typed expressions) instead of raw Expr. Column references are validated at compile time:

{-# LANGUAGE DataKinds, TypeApplications, TypeOperators #-}
import qualified DataFrame.Typed as T

type People = '[T.Column "name" Text, T.Column "age" Int]

main = do
    raw <- D.readCsv "people.csv"
    case T.freeze @People raw of
        Nothing -> putStrLn "Schema mismatch!"
        Just df -> do
            let adults = T.filterWhere (T.col @"age" T..>=. T.lit 18) df
            let names  = T.columnAsList @"name" adults  -- :: [Text]
            print names

Column references like T.col @"age" are checked at compile time — if the column doesn't exist or has the wrong type, you get a type error, not a runtime exception.

filterAllJust tracks Maybe-stripping

df :: TypedDataFrame '[Column "x" (Maybe Double), Column "y" Int]
T.filterAllJust df :: TypedDataFrame '[Column "x" Double, Column "y" Int]

Typed aggregation (Option B)

result = T.aggregate
    (T.agg @"total" (T.tsum (T.col @"salary"))
   $ T.agg @"count" (T.tcount (T.col @"salary"))
   $ T.aggNil)
    (T.groupBy @'["dept"] employees)
Synopsis

Core types

data TypedDataFrame (cols :: [Type]) Source #

A phantom-typed wrapper over the untyped DataFrame.

The type parameter cols is a type-level list of Column name ty entries that tracks the schema at compile time. All operations delegate to the untyped core at runtime and update the phantom type at compile time.

Instances

Instances details
Show (TypedDataFrame cols) Source # 
Instance details

Defined in DataFrame.Typed.Types

Eq (TypedDataFrame cols) Source # 
Instance details

Defined in DataFrame.Typed.Types

Methods

(==) :: TypedDataFrame cols -> TypedDataFrame cols -> Bool #

(/=) :: TypedDataFrame cols -> TypedDataFrame cols -> Bool #

data Column (name :: Symbol) a Source #

A phantom type that pairs a type-level column name (Symbol) with its element type. Has no value-level constructors — used purely at the type level to describe schemas.

Instances

Instances details
(KnownSymbol name, Typeable a, Columnable a, KnownSchema rest) => KnownSchema (Column name a ': rest) Source # 
Instance details

Defined in DataFrame.Typed.Schema

data TypedGrouped (keys :: [Symbol]) (cols :: [Type]) Source #

A phantom-typed wrapper over GroupedDataFrame.

data These a b #

The These type represents values with two non-exclusive possibilities.

This can be useful to represent combinations of two values, where the combination is defined if either input is. Algebraically, the type These A B represents (A + B + AB), which doesn't factor easily into sums and products--a type like Either A (B, Maybe A) is unclear and awkward to use.

These has straightforward instances of Functor, Monad, &c., and behaves like a hybrid error/writer monad, as would be expected.

For zipping and unzipping of structures with These values, see Data.Align.

Constructors

This a 
That b 
These a b 

Instances

Instances details
FromJSON2 These

Since: aeson-1.5.1.0

Instance details

Defined in Data.Aeson.Types.FromJSON

Methods

liftParseJSON2 :: Maybe a -> (Value -> Parser a) -> (Value -> Parser [a]) -> Maybe b -> (Value -> Parser b) -> (Value -> Parser [b]) -> Value -> Parser (These a b) #

liftParseJSONList2 :: Maybe a -> (Value -> Parser a) -> (Value -> Parser [a]) -> Maybe b -> (Value -> Parser b) -> (Value -> Parser [b]) -> Value -> Parser [These a b] #

liftOmittedField2 :: Maybe a -> Maybe b -> Maybe (These a b) #

Assoc These

Since: these-0.8

Instance details

Defined in Data.These

Methods

assoc :: These (These a b) c -> These a (These b c) #

unassoc :: These a (These b c) -> These (These a b) c #

Swap These

Since: these-0.8

Instance details

Defined in Data.These

Methods

swap :: These a b -> These b a #

Bifoldable These 
Instance details

Defined in Data.These

Methods

bifold :: Monoid m => These m m -> m #

bifoldMap :: Monoid m => (a -> m) -> (b -> m) -> These a b -> m #

bifoldr :: (a -> c -> c) -> (b -> c -> c) -> c -> These a b -> c #

bifoldl :: (c -> a -> c) -> (c -> b -> c) -> c -> These a b -> c #

Bifoldable1 These

Since: these-1.2

Instance details

Defined in Data.These

Methods

bifold1 :: Semigroup m => These m m -> m #

bifoldMap1 :: Semigroup m => (a -> m) -> (b -> m) -> These a b -> m #

Bifunctor These 
Instance details

Defined in Data.These

Methods

bimap :: (a -> b) -> (c -> d) -> These a c -> These b d #

first :: (a -> b) -> These a c -> These b c #

second :: (b -> c) -> These a b -> These a c #

Bitraversable These 
Instance details

Defined in Data.These

Methods

bitraverse :: Applicative f => (a -> f c) -> (b -> f d) -> These a b -> f (These c d) #

Eq2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftEq2 :: (a -> b -> Bool) -> (c -> d -> Bool) -> These a c -> These b d -> Bool #

Ord2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftCompare2 :: (a -> b -> Ordering) -> (c -> d -> Ordering) -> These a c -> These b d -> Ordering #

Read2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftReadsPrec2 :: (Int -> ReadS a) -> ReadS [a] -> (Int -> ReadS b) -> ReadS [b] -> Int -> ReadS (These a b) #

liftReadList2 :: (Int -> ReadS a) -> ReadS [a] -> (Int -> ReadS b) -> ReadS [b] -> ReadS [These a b] #

liftReadPrec2 :: ReadPrec a -> ReadPrec [a] -> ReadPrec b -> ReadPrec [b] -> ReadPrec (These a b) #

liftReadListPrec2 :: ReadPrec a -> ReadPrec [a] -> ReadPrec b -> ReadPrec [b] -> ReadPrec [These a b] #

Show2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftShowsPrec2 :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> (Int -> b -> ShowS) -> ([b] -> ShowS) -> Int -> These a b -> ShowS #

liftShowList2 :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> (Int -> b -> ShowS) -> ([b] -> ShowS) -> [These a b] -> ShowS #

NFData2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftRnf2 :: (a -> ()) -> (b -> ()) -> These a b -> () #

Hashable2 These

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftHashWithSalt2 :: (Int -> a -> Int) -> (Int -> b -> Int) -> Int -> These a b -> Int #

Generic1 (These a :: Type -> Type) 
Instance details

Defined in Data.These

Associated Types

type Rep1 (These a :: Type -> Type) 
Instance details

Defined in Data.These

Methods

from1 :: These a a0 -> Rep1 (These a) a0 #

to1 :: Rep1 (These a) a0 -> These a a0 #

FromJSON a => FromJSON1 (These a)

Since: aeson-1.5.1.0

Instance details

Defined in Data.Aeson.Types.FromJSON

Methods

liftParseJSON :: Maybe a0 -> (Value -> Parser a0) -> (Value -> Parser [a0]) -> Value -> Parser (These a a0) #

liftParseJSONList :: Maybe a0 -> (Value -> Parser a0) -> (Value -> Parser [a0]) -> Value -> Parser [These a a0] #

liftOmittedField :: Maybe a0 -> Maybe (These a a0) #

Foldable (These a) 
Instance details

Defined in Data.These

Methods

fold :: Monoid m => These a m -> m #

foldMap :: Monoid m => (a0 -> m) -> These a a0 -> m #

foldMap' :: Monoid m => (a0 -> m) -> These a a0 -> m #

foldr :: (a0 -> b -> b) -> b -> These a a0 -> b #

foldr' :: (a0 -> b -> b) -> b -> These a a0 -> b #

foldl :: (b -> a0 -> b) -> b -> These a a0 -> b #

foldl' :: (b -> a0 -> b) -> b -> These a a0 -> b #

foldr1 :: (a0 -> a0 -> a0) -> These a a0 -> a0 #

foldl1 :: (a0 -> a0 -> a0) -> These a a0 -> a0 #

toList :: These a a0 -> [a0] #

null :: These a a0 -> Bool #

length :: These a a0 -> Int #

elem :: Eq a0 => a0 -> These a a0 -> Bool #

maximum :: Ord a0 => These a a0 -> a0 #

minimum :: Ord a0 => These a a0 -> a0 #

sum :: Num a0 => These a a0 -> a0 #

product :: Num a0 => These a a0 -> a0 #

Eq a => Eq1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftEq :: (a0 -> b -> Bool) -> These a a0 -> These a b -> Bool #

Ord a => Ord1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftCompare :: (a0 -> b -> Ordering) -> These a a0 -> These a b -> Ordering #

Read a => Read1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftReadsPrec :: (Int -> ReadS a0) -> ReadS [a0] -> Int -> ReadS (These a a0) #

liftReadList :: (Int -> ReadS a0) -> ReadS [a0] -> ReadS [These a a0] #

liftReadPrec :: ReadPrec a0 -> ReadPrec [a0] -> ReadPrec (These a a0) #

liftReadListPrec :: ReadPrec a0 -> ReadPrec [a0] -> ReadPrec [These a a0] #

Show a => Show1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftShowsPrec :: (Int -> a0 -> ShowS) -> ([a0] -> ShowS) -> Int -> These a a0 -> ShowS #

liftShowList :: (Int -> a0 -> ShowS) -> ([a0] -> ShowS) -> [These a a0] -> ShowS #

Traversable (These a) 
Instance details

Defined in Data.These

Methods

traverse :: Applicative f => (a0 -> f b) -> These a a0 -> f (These a b) #

sequenceA :: Applicative f => These a (f a0) -> f (These a a0) #

mapM :: Monad m => (a0 -> m b) -> These a a0 -> m (These a b) #

sequence :: Monad m => These a (m a0) -> m (These a a0) #

Semigroup a => Applicative (These a) 
Instance details

Defined in Data.These

Methods

pure :: a0 -> These a a0 #

(<*>) :: These a (a0 -> b) -> These a a0 -> These a b #

liftA2 :: (a0 -> b -> c) -> These a a0 -> These a b -> These a c #

(*>) :: These a a0 -> These a b -> These a b #

(<*) :: These a a0 -> These a b -> These a a0 #

Functor (These a) 
Instance details

Defined in Data.These

Methods

fmap :: (a0 -> b) -> These a a0 -> These a b #

(<$) :: a0 -> These a b -> These a a0 #

Semigroup a => Monad (These a) 
Instance details

Defined in Data.These

Methods

(>>=) :: These a a0 -> (a0 -> These a b) -> These a b #

(>>) :: These a a0 -> These a b -> These a b #

return :: a0 -> These a a0 #

NFData a => NFData1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftRnf :: (a0 -> ()) -> These a a0 -> () #

Hashable a => Hashable1 (These a)

Since: these-1.1.1

Instance details

Defined in Data.These

Methods

liftHashWithSalt :: (Int -> a0 -> Int) -> Int -> These a a0 -> Int #

(FromJSON a, FromJSON b) => FromJSON (These a b)

Since: aeson-1.5.1.0

Instance details

Defined in Data.Aeson.Types.FromJSON

(Data a, Data b) => Data (These a b) 
Instance details

Defined in Data.These

Methods

gfoldl :: (forall d b0. Data d => c (d -> b0) -> d -> c b0) -> (forall g. g -> c g) -> These a b -> c (These a b) #

gunfold :: (forall b0 r. Data b0 => c (b0 -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c (These a b) #

toConstr :: These a b -> Constr #

dataTypeOf :: These a b -> DataType #

dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c (These a b)) #

dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (These a b)) #

gmapT :: (forall b0. Data b0 => b0 -> b0) -> These a b -> These a b #

gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> These a b -> r #

gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> These a b -> r #

gmapQ :: (forall d. Data d => d -> u) -> These a b -> [u] #

gmapQi :: Int -> (forall d. Data d => d -> u) -> These a b -> u #

gmapM :: Monad m => (forall d. Data d => d -> m d) -> These a b -> m (These a b) #

gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> These a b -> m (These a b) #

gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> These a b -> m (These a b) #

(Semigroup a, Semigroup b) => Semigroup (These a b) 
Instance details

Defined in Data.These

Methods

(<>) :: These a b -> These a b -> These a b #

sconcat :: NonEmpty (These a b) -> These a b #

stimes :: Integral b0 => b0 -> These a b -> These a b #

Generic (These a b) 
Instance details

Defined in Data.These

Associated Types

type Rep (These a b) 
Instance details

Defined in Data.These

Methods

from :: These a b -> Rep (These a b) x #

to :: Rep (These a b) x -> These a b #

(Read a, Read b) => Read (These a b) 
Instance details

Defined in Data.These

(Show a, Show b) => Show (These a b) 
Instance details

Defined in Data.These

Methods

showsPrec :: Int -> These a b -> ShowS #

show :: These a b -> String #

showList :: [These a b] -> ShowS #

(Binary a, Binary b) => Binary (These a b)

Since: these-0.7.1

Instance details

Defined in Data.These

Methods

put :: These a b -> Put #

get :: Get (These a b) #

putList :: [These a b] -> Put #

(NFData a, NFData b) => NFData (These a b)

Since: these-0.7.1

Instance details

Defined in Data.These

Methods

rnf :: These a b -> () #

(Eq a, Eq b) => Eq (These a b) 
Instance details

Defined in Data.These

Methods

(==) :: These a b -> These a b -> Bool #

(/=) :: These a b -> These a b -> Bool #

(Ord a, Ord b) => Ord (These a b) 
Instance details

Defined in Data.These

Methods

compare :: These a b -> These a b -> Ordering #

(<) :: These a b -> These a b -> Bool #

(<=) :: These a b -> These a b -> Bool #

(>) :: These a b -> These a b -> Bool #

(>=) :: These a b -> These a b -> Bool #

max :: These a b -> These a b -> These a b #

min :: These a b -> These a b -> These a b #

(Hashable a, Hashable b) => Hashable (These a b) 
Instance details

Defined in Data.These

Methods

hashWithSalt :: Int -> These a b -> Int #

hash :: These a b -> Int #

type Rep1 (These a :: Type -> Type) 
Instance details

Defined in Data.These

type Rep (These a b) 
Instance details

Defined in Data.These

Typed expressions

newtype TExpr (cols :: [Type]) a Source #

A typed expression validated against schema cols, producing values of type a.

Unlike the untyped 'Expr a', a TExpr can only be constructed through type-safe combinators (col, lit, arithmetic operations) that verify column references exist in the schema with the correct type.

Use unTExpr to extract the underlying Expr for delegation to the untyped API.

Constructors

TExpr 

Fields

Instances

Instances details
(IsString a, Columnable a) => IsString (TExpr cols a) Source # 
Instance details

Defined in DataFrame.Typed.Expr

Methods

fromString :: String -> TExpr cols a #

(Floating a, Columnable a) => Floating (TExpr cols a) Source # 
Instance details

Defined in DataFrame.Typed.Expr

Methods

pi :: TExpr cols a #

exp :: TExpr cols a -> TExpr cols a #

log :: TExpr cols a -> TExpr cols a #

sqrt :: TExpr cols a -> TExpr cols a #

(**) :: TExpr cols a -> TExpr cols a -> TExpr cols a #

logBase :: TExpr cols a -> TExpr cols a -> TExpr cols a #

sin :: TExpr cols a -> TExpr cols a #

cos :: TExpr cols a -> TExpr cols a #

tan :: TExpr cols a -> TExpr cols a #

asin :: TExpr cols a -> TExpr cols a #

acos :: TExpr cols a -> TExpr cols a #

atan :: TExpr cols a -> TExpr cols a #

sinh :: TExpr cols a -> TExpr cols a #

cosh :: TExpr cols a -> TExpr cols a #

tanh :: TExpr cols a -> TExpr cols a #

asinh :: TExpr cols a -> TExpr cols a #

acosh :: TExpr cols a -> TExpr cols a #

atanh :: TExpr cols a -> TExpr cols a #

log1p :: TExpr cols a -> TExpr cols a #

expm1 :: TExpr cols a -> TExpr cols a #

log1pexp :: TExpr cols a -> TExpr cols a #

log1mexp :: TExpr cols a -> TExpr cols a #

(Num a, Columnable a) => Num (TExpr cols a) Source # 
Instance details

Defined in DataFrame.Typed.Expr

Methods

(+) :: TExpr cols a -> TExpr cols a -> TExpr cols a #

(-) :: TExpr cols a -> TExpr cols a -> TExpr cols a #

(*) :: TExpr cols a -> TExpr cols a -> TExpr cols a #

negate :: TExpr cols a -> TExpr cols a #

abs :: TExpr cols a -> TExpr cols a #

signum :: TExpr cols a -> TExpr cols a #

fromInteger :: Integer -> TExpr cols a #

(Fractional a, Columnable a) => Fractional (TExpr cols a) Source # 
Instance details

Defined in DataFrame.Typed.Expr

Methods

(/) :: TExpr cols a -> TExpr cols a -> TExpr cols a #

recip :: TExpr cols a -> TExpr cols a #

fromRational :: Rational -> TExpr cols a #

col :: forall (name :: Symbol) (cols :: [Type]) a. (KnownSymbol name, a ~ Lookup name cols, Columnable a, AssertPresent name cols) => TExpr cols a Source #

Create a typed column reference. This is the key type-safety entry point.

The column name must exist in cols and its type must match a. Both checks happen at compile time via type families.

salary :: TExpr '[Column "salary" Double] Double
salary = col @"salary"

lit :: forall a (cols :: [Type]). Columnable a => a -> TExpr cols a Source #

Create a literal expression. Valid for any schema since it references no columns.

ifThenElse :: forall a (cols :: [Type]). Columnable a => TExpr cols Bool -> TExpr cols a -> TExpr cols a -> TExpr cols a Source #

Conditional expression.

lift :: forall a b (cols :: [Type]). (Columnable a, Columnable b) => (a -> b) -> TExpr cols a -> TExpr cols b Source #

Lift a unary function into a typed expression.

lift2 :: forall a b c (cols :: [Type]). (Columnable a, Columnable b, Columnable c) => (a -> b -> c) -> TExpr cols a -> TExpr cols b -> TExpr cols c Source #

Lift a binary function into typed expressions.

Comparison operators

(.==.) :: forall a (cols :: [Type]). (Columnable a, Eq a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

(./=.) :: forall a (cols :: [Type]). (Columnable a, Eq a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

(.<.) :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

(.<=.) :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

(.>=.) :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

(.>.) :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a -> TExpr cols Bool infixl 4 Source #

Logical operators

(.&&.) :: forall (cols :: [Type]). TExpr cols Bool -> TExpr cols Bool -> TExpr cols Bool infixr 3 Source #

(.||.) :: forall (cols :: [Type]). TExpr cols Bool -> TExpr cols Bool -> TExpr cols Bool infixr 2 Source #

not :: forall (cols :: [Type]). TExpr cols Bool -> TExpr cols Bool Source #

Aggregation expression combinators

sum :: forall a (cols :: [Type]). (Columnable a, Num a) => TExpr cols a -> TExpr cols a Source #

mean :: forall a (cols :: [Type]). (Columnable a, Real a) => TExpr cols a -> TExpr cols Double Source #

count :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TExpr cols Int Source #

minimum :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a Source #

maximum :: forall a (cols :: [Type]). (Columnable a, Ord a) => TExpr cols a -> TExpr cols a Source #

collect :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TExpr cols [a] Source #

Typed sort orders

data TSortOrder (cols :: [Type]) where Source #

A typed sort order validated against schema cols.

Constructors

Asc :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TSortOrder cols 
Desc :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TSortOrder cols 

asc :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TSortOrder cols Source #

Create an ascending sort order from a typed expression.

desc :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> TSortOrder cols Source #

Create a descending sort order from a typed expression.

Named expression helper

as :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> Text -> NamedExpr Source #

Create a NamedExpr for use with aggregateUntyped.

Freeze / thaw boundary

freeze :: forall (cols :: [Type]). KnownSchema cols => DataFrame -> Maybe (TypedDataFrame cols) Source #

Validate that an untyped DataFrame matches the expected schema cols, then wrap it. Returns Nothing on mismatch.

freezeWithError :: forall (cols :: [Type]). KnownSchema cols => DataFrame -> Either Text (TypedDataFrame cols) Source #

Like freeze but returns a descriptive error message on failure.

thaw :: forall (cols :: [Type]). TypedDataFrame cols -> DataFrame Source #

Unwrap a typed DataFrame back to the untyped representation. Always safe; discards type information.

unsafeFreeze :: forall (cols :: [Type]). DataFrame -> TypedDataFrame cols Source #

Wrap an untyped DataFrame without any validation. Used internally after delegation where the library guarantees schema correctness.

Typed column access

columnAsVector :: forall (name :: Symbol) (cols :: [Type]) a. (KnownSymbol name, a ~ Lookup name cols, Columnable a, AssertPresent name cols) => TypedDataFrame cols -> Vector a Source #

Retrieve a column as a boxed Vector, with the type determined by the schema. The column must exist (enforced at compile time).

columnAsList :: forall (name :: Symbol) (cols :: [Type]) a. (KnownSymbol name, a ~ Lookup name cols, Columnable a, AssertPresent name cols) => TypedDataFrame cols -> [a] Source #

Retrieve a column as a list, with the type determined by the schema.

Schema-preserving operations

filterWhere :: forall (cols :: [Type]). TExpr cols Bool -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows where a boolean expression evaluates to True. The expression is validated against the schema at compile time.

filter :: forall a (cols :: [Type]). Columnable a => TExpr cols a -> (a -> Bool) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows by applying a predicate to a typed expression.

filterBy :: forall a (cols :: [Type]). Columnable a => (a -> Bool) -> TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols Source #

Filter rows by a predicate on a column expression (flipped argument order).

filterAllJust :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame (StripAllMaybe cols) Source #

Keep only rows where ALL Optional columns have Just values. Strips Maybe from all column types in the result schema.

df :: TDF '[Column "x" (Maybe Double), Column "y" Int]
filterAllJust df :: TDF '[Column "x" Double, Column "y" Int]

filterJust :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame (StripMaybeAt name cols) Source #

Keep only rows where the named column has Just values. Strips Maybe from that column's type in the result schema.

filterJust @"x" df

filterNothing :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame cols Source #

Keep only rows where the named column has Nothing. Schema is preserved (column types unchanged, just fewer rows).

sortBy :: forall (cols :: [Type]). [TSortOrder cols] -> TypedDataFrame cols -> TypedDataFrame cols Source #

Sort by the given typed sort orders. Sort orders reference columns that are validated against the schema.

take :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take the first n rows.

takeLast :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take the last n rows.

drop :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Drop the first n rows.

dropLast :: forall (cols :: [Type]). Int -> TypedDataFrame cols -> TypedDataFrame cols Source #

Drop the last n rows.

range :: forall (cols :: [Type]). (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take rows in the given range (start, end).

cube :: forall (cols :: [Type]). (Int, Int) -> TypedDataFrame cols -> TypedDataFrame cols Source #

Take a sub-cube of the DataFrame.

distinct :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame cols Source #

Remove duplicate rows.

sample :: forall g (cols :: [Type]). RandomGen g => g -> Double -> TypedDataFrame cols -> TypedDataFrame cols Source #

Randomly sample a fraction of rows.

shuffle :: forall g (cols :: [Type]). RandomGen g => g -> TypedDataFrame cols -> TypedDataFrame cols Source #

Shuffle all rows randomly.

Schema-modifying operations

derive :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => TExpr cols a -> TypedDataFrame cols -> TypedDataFrame (Snoc cols (Column name a)) Source #

Derive a new column from a typed expression. The column name must NOT already exist in the schema (enforced at compile time via AssertAbsent). The expression is validated against the current schema.

df' = derive @"total" (col @"price" * col @"qty") df
-- df' :: TDF (Column "total" Double ': originalCols)

impute :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, Maybe a ~ Lookup name cols) => a -> TypedDataFrame cols -> TypedDataFrame (Impute name cols) Source #

select :: forall (names :: [Symbol]) (cols :: [Type]). (AllKnownSymbol names, AssertAllPresent names cols) => TypedDataFrame cols -> TypedDataFrame (SubsetSchema names cols) Source #

Select a subset of columns by name.

exclude :: forall (names :: [Symbol]) (cols :: [Type]). AllKnownSymbol names => TypedDataFrame cols -> TypedDataFrame (ExcludeSchema names cols) Source #

Exclude columns by name.

rename :: forall (old :: Symbol) (new :: Symbol) (cols :: [Type]). (KnownSymbol old, KnownSymbol new) => TypedDataFrame cols -> TypedDataFrame (RenameInSchema old new cols) Source #

Rename a column.

renameMany :: forall (pairs :: [(Symbol, Symbol)]) (cols :: [Type]). AllKnownPairs pairs => TypedDataFrame cols -> TypedDataFrame (RenameManyInSchema pairs cols) Source #

Rename multiple columns from a type-level list of pairs.

insert :: forall (name :: Symbol) a (cols :: [Type]) t. (KnownSymbol name, Columnable a, Foldable t, AssertAbsent name cols) => t a -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a new column from a Foldable container.

insertColumn :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => Column -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a raw Column value.

insertVector :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, AssertAbsent name cols) => Vector a -> TypedDataFrame cols -> TypedDataFrame (Column name a ': cols) Source #

Insert a boxed Vector.

cloneColumn :: forall (old :: Symbol) (new :: Symbol) (cols :: [Type]). (KnownSymbol old, KnownSymbol new, AssertPresent old cols, AssertAbsent new cols) => TypedDataFrame cols -> TypedDataFrame (Column new (Lookup old cols) ': cols) Source #

Clone an existing column under a new name.

dropColumn :: forall (name :: Symbol) (cols :: [Type]). (KnownSymbol name, AssertPresent name cols) => TypedDataFrame cols -> TypedDataFrame (RemoveColumn name cols) Source #

Drop a column by name.

replaceColumn :: forall (name :: Symbol) a (cols :: [Type]). (KnownSymbol name, Columnable a, a ~ Lookup name cols, AssertPresent name cols) => TExpr cols a -> TypedDataFrame cols -> TypedDataFrame cols Source #

Replace an existing column with new values derived from a typed expression. The column must already exist and the new type must match.

Metadata

dimensions :: forall (cols :: [Type]). TypedDataFrame cols -> (Int, Int) Source #

nRows :: forall (cols :: [Type]). TypedDataFrame cols -> Int Source #

nColumns :: forall (cols :: [Type]). TypedDataFrame cols -> Int Source #

columnNames :: forall (cols :: [Type]). TypedDataFrame cols -> [Text] Source #

Vertical merge

append :: forall (cols :: [Type]). TypedDataFrame cols -> TypedDataFrame cols -> TypedDataFrame cols Source #

Vertically merge two DataFrames with the same schema.

Joins

innerJoin :: forall (keys :: [Symbol]) (left :: [Type]) (right :: [Type]). AllKnownSymbol keys => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (InnerJoinSchema keys left right) Source #

Typed inner join on one or more key columns.

leftJoin :: forall (keys :: [Symbol]) (left :: [Type]) (right :: [Type]). AllKnownSymbol keys => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (LeftJoinSchema keys left right) Source #

Typed left join.

rightJoin :: forall (keys :: [Symbol]) (left :: [Type]) (right :: [Type]). AllKnownSymbol keys => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (RightJoinSchema keys left right) Source #

Typed right join.

fullOuterJoin :: forall (keys :: [Symbol]) (left :: [Type]) (right :: [Type]). AllKnownSymbol keys => TypedDataFrame left -> TypedDataFrame right -> TypedDataFrame (FullOuterJoinSchema keys left right) Source #

Typed full outer join.

GroupBy and Aggregation (Option B)

groupBy :: forall (keys :: [Symbol]) (cols :: [Type]). (AllKnownSymbol keys, AssertAllPresent keys cols) => TypedDataFrame cols -> TypedGrouped keys cols Source #

Group a typed DataFrame by one or more key columns.

grouped = groupBy @'["department"] employees

agg :: forall (name :: Symbol) a (keys :: [Symbol]) (cols :: [Type]) (aggs :: [Type]). (KnownSymbol name, Columnable a) => TExpr cols a -> TAgg keys cols aggs -> TAgg keys cols (Column name a ': aggs) Source #

Add one aggregation to the builder.

Each call prepends a Column name a to the result schema and records the runtime NamedExpr. The expression is validated against the source schema cols at compile time.

agg @"total_sales" (tsum (col @"salary"))
  $ agg @"avg_price" (tmean (col @"price"))
  $ aggNil

aggNil :: forall (keys :: [Symbol]) (cols :: [Type]). TAgg keys cols ('[] :: [Type]) Source #

The empty aggregation — no output columns beyond the group keys.

aggregate :: forall (keys :: [Symbol]) (cols :: [Type]) (aggs :: [Type]). TAgg keys cols aggs -> TypedGrouped keys cols -> TypedDataFrame (Append (GroupKeyColumns keys cols) (Reverse aggs)) Source #

Run a typed aggregation.

Result schema = grouping key columns ++ aggregated columns (in declaration order).

result = aggregate
    (agg @"total" (tsum (col "salary")) $ agg @"count" (tcount (col "salary") $ aggNil)
    (groupBy @'["dept"] employees)
-- result :: TDF '[Column "dept" Text, Column "total" Double, Column "count" Int]

aggregateUntyped :: forall (keys :: [Symbol]) (cols :: [Type]). [NamedExpr] -> TypedGrouped keys cols -> DataFrame Source #

Escape hatch: run an untyped aggregation and return a raw DataFrame.

Template Haskell

Schema type families (for advanced use)

type family Lookup (name :: Symbol) (cols :: [Type]) where ... Source #

Look up the element type of a column by name.

Equations

Lookup name (Column name a ': _1) = a 
Lookup name (Column _1 _2 ': rest) = Lookup name rest 
Lookup name ('[] :: [Type]) = TypeError (('Text "Column '" ':<>: 'Text name) ':<>: 'Text "' not found in schema") :: Type 

type family HasName (name :: Symbol) (cols :: [Type]) :: Bool where ... Source #

Check whether a column name exists in a schema (type-level Bool).

Equations

HasName name (Column name _1 ': _2) = 'True 
HasName name (Column _1 _2 ': rest) = HasName name rest 
HasName name ('[] :: [Type]) = 'False 

type family SubsetSchema (names :: [Symbol]) (cols :: [Type]) :: [Type] where ... Source #

Select a subset of columns by a list of names.

Equations

SubsetSchema ('[] :: [Symbol]) cols = '[] :: [Type] 
SubsetSchema (n ': ns) cols = Column n (Lookup n cols) ': SubsetSchema ns cols 

type family ExcludeSchema (names :: [Symbol]) (cols :: [Type]) :: [Type] where ... Source #

Exclude columns by a list of names.

Equations

ExcludeSchema names ('[] :: [Type]) = '[] :: [Type] 
ExcludeSchema names (Column n a ': rest) = If (IsElem n names) (ExcludeSchema names rest) (Column n a ': ExcludeSchema names rest) 

type family RenameInSchema (old :: Symbol) (new :: Symbol) (cols :: [Type]) :: [Type] where ... Source #

Rename a column in the schema.

Equations

RenameInSchema old new (Column old a ': rest) = Column new a ': rest 
RenameInSchema old new (col ': rest) = col ': RenameInSchema old new rest 
RenameInSchema old new ('[] :: [Type]) = TypeError (('Text "Cannot rename: column '" ':<>: 'Text old) ':<>: 'Text "' not found") :: [Type] 

type family RemoveColumn (name :: Symbol) (cols :: [Type]) :: [Type] where ... Source #

Remove a column by name from a schema.

Equations

RemoveColumn name (Column name _1 ': rest) = rest 
RemoveColumn name (col ': rest) = col ': RemoveColumn name rest 
RemoveColumn name ('[] :: [Type]) = '[] :: [Type] 

type family Impute (name :: Symbol) (cols :: [Type]) :: [Type] where ... Source #

Unwrap a Maybe from a type after we impute values.

Equations

Impute name (Column name (Maybe a) ': rest) = Column name a ': rest 
Impute name (Column name _1 ': rest) = TypeError (('Text "Column '" ':<>: 'Text name) ':<>: 'Text "' is not of kind Maybe *") :: [Type] 
Impute name (col ': rest) = col ': Impute name rest 
Impute name ('[] :: [Type]) = '[] :: [Type] 

type family Append (xs :: [k]) (ys :: [k]) :: [k] where ... Source #

Append two type-level lists.

Equations

Append ('[] :: [k]) (ys :: [k]) = ys 
Append (x ': xs :: [k]) (ys :: [k]) = x ': Append xs ys 

type family Reverse (xs :: [Type]) :: [Type] where ... Source #

Reverse a type-level list.

Equations

Reverse xs = ReverseAcc xs ('[] :: [Type]) 

type family StripAllMaybe (cols :: [Type]) :: [Type] where ... Source #

Strip Maybe from all columns. Used by filterAllJust.

Column "x" (Maybe Double) becomes Column "x" Double. Column "y" Int stays Column "y" Int.

Equations

StripAllMaybe ('[] :: [Type]) = '[] :: [Type] 
StripAllMaybe (Column n (Maybe a) ': rest) = Column n a ': StripAllMaybe rest 
StripAllMaybe (Column n a ': rest) = Column n a ': StripAllMaybe rest 

type family StripMaybeAt (name :: Symbol) (cols :: [Type]) :: [Type] where ... Source #

Strip Maybe from a single named column. Used by filterJust.

StripMaybeAt "x" '[Column "x" (Maybe Double), Column "y" Int] = '[Column "x" Double, Column "y" Int]

Equations

StripMaybeAt name (Column name (Maybe a) ': rest) = Column name a ': rest 
StripMaybeAt name (Column name a ': rest) = Column name a ': rest 
StripMaybeAt name (col ': rest) = col ': StripMaybeAt name rest 
StripMaybeAt name ('[] :: [Type]) = TypeError (('Text "Column '" ':<>: 'Text name) ':<>: 'Text "' not found in schema") :: [Type] 

type family GroupKeyColumns (keys :: [Symbol]) (cols :: [Type]) :: [Type] where ... Source #

Extract Column entries from a schema whose names appear in keys.

Equations

GroupKeyColumns keys ('[] :: [Type]) = '[] :: [Type] 
GroupKeyColumns keys (Column n a ': rest) = If (IsElem n keys) (Column n a ': GroupKeyColumns keys rest) (GroupKeyColumns keys rest) 

type family InnerJoinSchema (keys :: [Symbol]) (left :: [Type]) (right :: [Type]) :: [Type] where ... Source #

Inner join result schema.

Equations

InnerJoinSchema keys left right = Append (SubsetSchema keys left) (Append (UniqueLeft left (Append keys (ColumnNames right))) (Append (UniqueLeft right (Append keys (ColumnNames left))) (CollidingColumns left right keys))) 

type family LeftJoinSchema (keys :: [Symbol]) (left :: [Type]) (right :: [Type]) :: [Type] where ... Source #

Left join result schema.

Equations

LeftJoinSchema keys left right = Append (SubsetSchema keys left) (Append (UniqueLeft left (Append keys (ColumnNames right))) (Append (WrapMaybe (UniqueLeft right (Append keys (ColumnNames left)))) (CollidingColumns left right keys))) 

type family RightJoinSchema (keys :: [Symbol]) (left :: [Type]) (right :: [Type]) :: [Type] where ... Source #

Right join result schema.

Equations

RightJoinSchema keys left right = Append (SubsetSchema keys right) (Append (WrapMaybe (UniqueLeft left (Append keys (ColumnNames right)))) (Append (UniqueLeft right (Append keys (ColumnNames left))) (CollidingColumns left right keys))) 

type family FullOuterJoinSchema (keys :: [Symbol]) (left :: [Type]) (right :: [Type]) :: [Type] where ... Source #

Full outer join result schema.

Equations

FullOuterJoinSchema keys left right = Append (WrapMaybe (SubsetSchema keys left)) (Append (WrapMaybe (UniqueLeft left (Append keys (ColumnNames right)))) (Append (WrapMaybe (UniqueLeft right (Append keys (ColumnNames left)))) (CollidingColumns left right keys))) 

type family AssertAbsent (name :: Symbol) (cols :: [Type]) where ... Source #

Assert that a column name is absent from the schema (for derive/insert).

Equations

AssertAbsent name cols = AssertAbsentHelper name (HasName name cols) cols 

type family AssertAllPresent (name :: [Symbol]) (cols :: [Type]) where ... Source #

Assert that a column name is present in the schema.

Equations

AssertAllPresent (name ': rest) cols = If (HasName name cols) (AssertAllPresent rest cols) (TypeError (('Text "Column '" ':<>: 'Text name) ':<>: 'Text "' not found in schema") :: Constraint) 
AssertAllPresent ('[] :: [Symbol]) cols = () 

type family AssertPresent (name :: Symbol) (cols :: [Type]) where ... Source #

Assert that a column name is present in the schema.

Equations

AssertPresent name cols = AssertPresentHelper name (HasName name cols) cols 

Constraints

class KnownSchema (cols :: [Type]) where Source #

Provides runtime evidence of a schema: a list of (name, TypeRep) pairs.

Instances

Instances details
KnownSchema ('[] :: [Type]) Source # 
Instance details

Defined in DataFrame.Typed.Schema

(KnownSymbol name, Typeable a, Columnable a, KnownSchema rest) => KnownSchema (Column name a ': rest) Source # 
Instance details

Defined in DataFrame.Typed.Schema

class AllKnownSymbol (names :: [Symbol]) where Source #

A class that provides a list of Text values for a type-level list of Symbols.

Methods

symbolVals :: [Text] Source #

Instances

Instances details
AllKnownSymbol ('[] :: [Symbol]) Source # 
Instance details

Defined in DataFrame.Typed.Schema

Methods

symbolVals :: [Text] Source #

(KnownSymbol n, AllKnownSymbol ns) => AllKnownSymbol (n ': ns) Source # 
Instance details

Defined in DataFrame.Typed.Schema

Methods

symbolVals :: [Text] Source #