{-# LANGUAGE InstanceSigs #-}

module DataFrame.Operations.Merge where

import qualified Data.List as L
import qualified Data.Text as T
import qualified DataFrame.Internal.Column as D
import qualified DataFrame.Internal.DataFrame as D
import qualified DataFrame.Operations.Core as D

import Data.Maybe

{- | Vertically merge two dataframes using shared columns.
Columns that exist in only one dataframe are padded with Nothing.
-}
instance Semigroup D.DataFrame where
    (<>) :: D.DataFrame -> D.DataFrame -> D.DataFrame
    <> :: DataFrame -> DataFrame -> DataFrame
(<>) DataFrame
a DataFrame
b =
        let
            addColumns :: DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a' DataFrame
b' DataFrame
df Text
name
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame
df
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
                    Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
                    DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
                    Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
                    DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
                | Bool
otherwise =
                    let
                        numRowsA :: Int
numRowsA = (Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
a'
                        numRowsB :: Int
numRowsB = (Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
b'
                        sumRows :: Int
sumRows = Int
numRowsA Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
numRowsB

                        optA :: Maybe Column
optA = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
                        optB :: Maybe Column
optB = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
                     in
                        case Maybe Column
optB of
                            Maybe Column
Nothing -> case Maybe Column
optA of
                                Maybe Column
Nothing ->
                                    -- N.B. this case should never happen, because we're dealing with columns coming from
                                    -- union of column names of both dataframes. Nothing + Nothing would mean column
                                    -- wasn't in either dataframe, which shouldn't happen
                                    Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
D.fromList ([] :: [T.Text])) DataFrame
df
                                Just Column
a'' ->
                                    Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.expandColumn Int
sumRows Column
a'') DataFrame
df
                            Just Column
b'' -> case Maybe Column
optA of
                                Maybe Column
Nothing ->
                                    Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.leftExpandColumn Int
sumRows Column
b'') DataFrame
df
                                Just Column
a'' ->
                                    let concatedColumns :: Column
concatedColumns = Column -> Column -> Column
D.concatColumnsEither Column
a'' Column
b''
                                     in Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
concatedColumns DataFrame
df
         in
            (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a DataFrame
b) DataFrame
D.empty (DataFrame -> [Text]
D.columnNames DataFrame
a [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
`L.union` DataFrame -> [Text]
D.columnNames DataFrame
b)

instance Monoid D.DataFrame where
    mempty :: DataFrame
mempty = DataFrame
D.empty

-- | Add two dataframes side by side/horizontally.
(|||) :: D.DataFrame -> D.DataFrame -> D.DataFrame
||| :: DataFrame -> DataFrame -> DataFrame
(|||) DataFrame
a DataFrame
b =
    (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
D.fold
        (\Text
name DataFrame
acc -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Text -> DataFrame -> Column
D.unsafeGetColumn Text
name DataFrame
b) DataFrame
acc)
        (DataFrame -> [Text]
D.columnNames DataFrame
b)
        DataFrame
a