{- |
Module      : DataFrame
Copyright   : (c) 2025
License     : GPL-3.0
Maintainer  : mschavinda@gmail.com
Stability   : experimental
Portability : POSIX

Batteries-included entry point for the DataFrame library.

This module re-exports the most commonly used pieces of the @dataframe@ library so you
can get productive fast in GHCi, IHaskell, or scripts.

__Naming convention__

* Use the @D.@ (\"DataFrame\") prefix for core table operations.
* Use the @F.@ (\"Functions\") prefix for the expression DSL (columns, math, aggregations).

Example session:

We provide a script that imports the core functionality and defines helpful
macros for writing safe code.

@
\$ curl --output dataframe \"https:\/\/raw.githubusercontent.com\/mchav\/dataframe\/refs\/heads\/main\/scripts\/dataframe.sh\"
\$ chmod +x dataframe
\$ export PATH=$PATH:$PWD/dataframe
\$ dataframe
Configuring library for fake-package-0...
Warning: No exposed modules
GHCi, version 9.6.7: https:\/\/www.haskell.org\/ghc\/  :? for help
Loaded GHCi configuration from \/tmp\/cabal-repl.-242816\/setcwd.ghci
========================================
              📦Dataframe
========================================

✨  Modules were automatically imported.

💡  Use prefix 'D' for core functionality.
        ● E.g. D.readCsv \"\/path\/to\/file\"
💡  Use prefix 'F' for expression functions.
        ● E.g. F.sum (F.col \@Int \"value\")

✅ Ready.
Loaded GHCi configuration from ./dataframe.ghci
ghci>
@

= Quick start
Load a CSV, select a few columns, filter, derive a column, then group + aggregate:

@
-- 1) Load data
ghci> df0 <- D.readCsv "data/housing.csv"
ghci> D.describeColumns df0
--------------------------------------------------------------------------------------------------------------------
index |    Column Name     | # Non-null Values | # Null Values | # Partially parsed | # Unique Values |     Type
------|--------------------|-------------------|---------------|--------------------|-----------------|-------------
 Int  |        Text        |        Int        |      Int      |        Int         |       Int       |     Text
------|--------------------|-------------------|---------------|--------------------|-----------------|-------------
0     | ocean_proximity    | 20640             | 0             | 0                  | 5               | Text
1     | median_house_value | 20640             | 0             | 0                  | 3842            | Double
2     | median_income      | 20640             | 0             | 0                  | 12928           | Double
3     | households         | 20640             | 0             | 0                  | 1815            | Double
4     | population         | 20640             | 0             | 0                  | 3888            | Double
5     | total_bedrooms     | 20640             | 0             | 0                  | 1924            | Maybe Double
6     | total_rooms        | 20640             | 0             | 0                  | 5926            | Double
7     | housing_median_age | 20640             | 0             | 0                  | 52              | Double
8     | latitude           | 20640             | 0             | 0                  | 862             | Double
9     | longitude          | 20640             | 0             | 0                  | 844             | Double

-- 2) Project & filter
ghci> :exposeColumn df
ghci> df1 = D.filterWhere (ocean_proximity F.== F.lit \"ISLAND\") df0 D.|> D.select [F.name median_house_value, F.name median_income, F.name ocean_proximity]

-- 3) Add a derived column using the expression DSL
--    (col types are explicit via TypeApplications)
ghci> df2 = D.derive "rooms_per_household" (total_rooms / households) df0

-- 4) Group + aggregate
ghci> let grouped   = D.groupBy ["ocean_proximity"] df0
ghci> let summary   =
         D.aggregate
             [ F.maximum median_house_value \`F.as\` "max_house_value"]
             grouped
ghci> D.take 5 summary
-----------------------------------------
index | ocean_proximity | max_house_value
------|-----------------|----------------
 Int  |      Text       |     Double
------|-----------------|----------------
0     | <1H OCEAN       | 500001.0
1     | INLAND          | 500001.0
2     | ISLAND          | 450000.0
3     | NEAR BAY        | 500001.0
4     | NEAR OCEAN      | 500001.0
@

== Simple operations (cheat sheet)
Most users only need a handful of verbs:

__I/O__

  * @D.readCsv :: FilePath -> IO DataFrame@
  * @D.readTsv :: FilePath -> IO DataFrame@
  * @D.writeCsv :: FilePath -> DataFrame -> IO ()@
  * @D.readParquet :: FilePath -> IO DataFrame@

__Exploration__

  * @D.take :: Int -> DataFrame -> DataFrame@
  * @D.takeLast :: Int -> DataFrame -> DataFrame@
  * @D.describeColumns :: DataFrame -> DataFrame@
  * @D.summarize :: DataFrame -> DataFrame@

__Row ops__

  * @D.filterWhere :: Expr Bool -> DataFrame -> DataFrame@
  * @D.sortBy :: SortOrder -> [Text] -> DataFrame -> DataFrame@

__Column ops__

  * @D.select :: [Text] -> DataFrame -> DataFrame@
  * @D.exclude :: [Text] -> DataFrame -> DataFrame@
  * @D.rename :: [(Text,Text)] -> DataFrame -> DataFrame@
  * @D.derive :: Text -> D.Expr a -> DataFrame -> DataFrame@

__Group & aggregate__

  * @D.groupBy :: [Text] -> DataFrame -> GroupedDataFrame@
  * @D.aggregate :: [(Text, F.UExpr)] -> GroupedDataFrame -> DataFrame@

__Joins__

  * @D.innerJoin \/ D.leftJoin \/ D.rightJoin \/ D.fullJoin@

== Expression DSL (F.*) at a glance
Columns (typed):

@
F.col \@Text   "ocean_proximity"
F.col \@Double "total_rooms"
F.lit \@Double 1.0
@

Math & comparisons (overloaded by type):

@
(+), (-), (*), (/), abs, log, exp, round
(F.eq), (F.gt), (F.geq), (F.lt), (F.leq)
@

Aggregations (for D.'aggregate'):

@
F.count \@a (F.col \@a "c")
F.sum   \@Double (F.col \@Double "x")
F.mean  \@Double (F.col \@Double "x")
F.min   \@t (F.col \@t "x")
F.max   \@t (F.col \@t "x")
@

== REPL power-tool: ':exposeColumns'

Use @:exposeColumns <df>@ in GHCi/IHaskell to turn each column of a bound 'DataFrame'
into a local binding with the same (mangled if needed) name and the column's concrete
vector type. This is great for quick ad-hoc analysis, plotting, or hand-rolled checks.

@
-- Suppose df has columns: "passengers" :: Int, "fare" :: Double, "payment" :: Text
ghci> :set -XTemplateHaskell
ghci> :exposeColumns df

-- Now you have in scope:
ghci> :type passengers
passengers :: Expr Int

ghci> :type fare
fare :: Expr Double

ghci> :type payment
payment :: Expr Text

-- You can use them directly:
ghci> D.derive "fare_with_tip" (fare * F.lit 1.2)
@

Notes:

* Name mangling: spaces and non-identifier characters are replaced (e.g. @"trip id"@ -> @trip_id@).
* Optional/nullable columns are exposed as @Expr (Maybe a)@.
-}
module DataFrame (
    -- * Core data structures
    module Dataframe,
    module Column,
    module Row,
    module Expression,

    -- * Core dataframe operations
    module Core,

    -- * I/O
    module CSV,
    module Parquet,

    -- * Operations
    module Subset,
    module Transformations,
    module Aggregation,
    module Sorting,
    module Merge,
    module Join,
    module Statistics,

    -- * Errors
    module Errors,

    -- * Plotting
    module Plot,

    -- * Convenience functions
    (|>),
)
where

import DataFrame.Display.Terminal.Plot as Plot
import DataFrame.Errors as Errors
import DataFrame.IO.CSV as CSV (
    ReadOptions (..),
    defaultReadOptions,
    readCsv,
    readCsvWithOpts,
    readSeparated,
    readTsv,
    writeCsv,
    writeSeparated,
 )
import DataFrame.IO.Parquet as Parquet (readParquet)
import DataFrame.Internal.Column as Column (
    Column,
    fromList,
    fromUnboxedVector,
    fromVector,
    hasElemType,
    hasMissing,
    isNumeric,
    toList,
    toVector,
 )
import DataFrame.Internal.DataFrame as Dataframe (
    DataFrame,
    GroupedDataFrame,
    columnAsVector,
    empty,
    toMarkdownTable,
    toMatrix,
 )
import DataFrame.Internal.Expression as Expression (Expr)
import DataFrame.Internal.Row as Row (Row, fromAny, toAny, toRowList)
import DataFrame.Operations.Aggregation as Aggregation (
    aggregate,
    distinct,
    groupBy,
 )
import DataFrame.Operations.Core as Core hiding (
    ColumnInfo (..),
    nulls,
    partiallyParsed,
    renameSafe,
 )
import DataFrame.Operations.Join as Join
import DataFrame.Operations.Merge as Merge
import DataFrame.Operations.Sorting as Sorting
import DataFrame.Operations.Statistics as Statistics (
    correlation,
    frequencies,
    interQuartileRange,
    mean,
    median,
    skewness,
    standardDeviation,
    sum,
    summarize,
    variance,
 )
import DataFrame.Operations.Subset as Subset (
    SelectionCriteria,
    byIndexRange,
    byName,
    byNameProperty,
    byNameRange,
    byProperty,
    cube,
    drop,
    dropLast,
    exclude,
    filter,
    filterAllJust,
    filterBy,
    filterJust,
    filterNothing,
    filterWhere,
    kFolds,
    randomSplit,
    range,
    sample,
    select,
    selectBy,
    take,
    takeLast,
 )
import DataFrame.Operations.Transformations as Transformations

import Data.Function ((&))

|> :: a -> (a -> b) -> b
(|>) = a -> (a -> b) -> b
forall a b. a -> (a -> b) -> b
(&)