{-# LANGUAGE InstanceSigs #-}

module DataFrame.Operations.Merge where

import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified DataFrame.Internal.Column as D
import qualified DataFrame.Internal.DataFrame as D
import qualified DataFrame.Operations.Core as D

import Data.Maybe

instance Semigroup D.DataFrame where
    -- \| Vertically merge two dataframes using shared columns.
    (<>) :: D.DataFrame -> D.DataFrame -> D.DataFrame
    <> :: DataFrame -> DataFrame -> DataFrame
(<>) DataFrame
a DataFrame
b =
        let
            columnsInBOnly :: [Text]
columnsInBOnly = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Text
c -> Text
c Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
D.columnNames DataFrame
b) (DataFrame -> [Text]
D.columnNames DataFrame
b)
            columnsInA :: [Text]
columnsInA = DataFrame -> [Text]
D.columnNames DataFrame
a
            addColumns :: DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a' DataFrame
b' DataFrame
df Text
name
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame
df
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
                    Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
                    DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
                | (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
                    Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
                    DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
                | Bool
otherwise =
                    let
                        numColumnsA :: Int
numColumnsA = ((Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
a')
                        numColumnsB :: Int
numColumnsB = ((Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
b')
                        numColumns :: Int
numColumns = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
numColumnsA Int
numColumnsB
                        optA :: Maybe Column
optA = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
                        optB :: Maybe Column
optB = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
                     in
                        case Maybe Column
optB of
                            Maybe Column
Nothing -> case Maybe Column
optA of
                                Maybe Column
Nothing -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
D.fromList ([] :: [T.Text])) DataFrame
df
                                Just Column
a'' -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.expandColumn Int
numColumnsB Column
a'') DataFrame
df
                            Just Column
b'' -> case Maybe Column
optA of
                                Maybe Column
Nothing -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.leftExpandColumn Int
numColumnsA Column
b'') DataFrame
df
                                Just Column
a'' -> DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
                                    Column
concatedColumns <- Column -> Column -> Maybe Column
D.concatColumns Column
a'' Column
b''
                                    DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
concatedColumns DataFrame
df
         in
            (DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a DataFrame
b) DataFrame
D.empty (DataFrame -> [Text]
D.columnNames DataFrame
a [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
`L.union` DataFrame -> [Text]
D.columnNames DataFrame
b)

instance Monoid D.DataFrame where
    mempty :: DataFrame
mempty = DataFrame
D.empty

-- | Add two dataframes side by side/horizontally.
(|||) :: D.DataFrame -> D.DataFrame -> D.DataFrame
||| :: DataFrame -> DataFrame -> DataFrame
(|||) DataFrame
a DataFrame
b = (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
D.fold (\Text
name DataFrame
acc -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Text -> DataFrame -> Column
D.unsafeGetColumn Text
name DataFrame
b) DataFrame
acc) (DataFrame -> [Text]
D.columnNames DataFrame
b) DataFrame
a