{-# LANGUAGE InstanceSigs #-}
module DataFrame.Operations.Merge where
import qualified Data.List as L
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified DataFrame.Internal.Column as D
import qualified DataFrame.Internal.DataFrame as D
import qualified DataFrame.Operations.Core as D
import Data.Maybe
instance Semigroup D.DataFrame where
(<>) :: D.DataFrame -> D.DataFrame -> D.DataFrame
<> :: DataFrame -> DataFrame -> DataFrame
(<>) DataFrame
a DataFrame
b =
let
columnsInBOnly :: [Text]
columnsInBOnly = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Text
c -> Text
c Text -> [Text] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` DataFrame -> [Text]
D.columnNames DataFrame
b) (DataFrame -> [Text]
D.columnNames DataFrame
b)
columnsInA :: [Text]
columnsInA = DataFrame -> [Text]
D.columnNames DataFrame
a
addColumns :: DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a' DataFrame
b' DataFrame
df Text
name
| (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
&& (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame
df
| (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
a') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
| (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
D.dimensions DataFrame
b') Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
Column
col <- Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
col DataFrame
df
| Bool
otherwise =
let
numColumnsA :: Int
numColumnsA = ((Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
a')
numColumnsB :: Int
numColumnsB = ((Int, Int) -> Int
forall a b. (a, b) -> a
fst ((Int, Int) -> Int) -> (Int, Int) -> Int
forall a b. (a -> b) -> a -> b
$ DataFrame -> (Int, Int)
D.dimensions DataFrame
b')
numColumns :: Int
numColumns = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
numColumnsA Int
numColumnsB
optA :: Maybe Column
optA = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
a'
optB :: Maybe Column
optB = Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
b'
in
case Maybe Column
optB of
Maybe Column
Nothing -> case Maybe Column
optA of
Maybe Column
Nothing -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name ([Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
D.fromList ([] :: [T.Text])) DataFrame
df
Just Column
a'' -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.expandColumn Int
numColumnsB Column
a'') DataFrame
df
Just Column
b'' -> case Maybe Column
optA of
Maybe Column
Nothing -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Int -> Column -> Column
D.leftExpandColumn Int
numColumnsA Column
b'') DataFrame
df
Just Column
a'' -> DataFrame -> Maybe DataFrame -> DataFrame
forall a. a -> Maybe a -> a
fromMaybe DataFrame
df (Maybe DataFrame -> DataFrame) -> Maybe DataFrame -> DataFrame
forall a b. (a -> b) -> a -> b
$ do
Column
concatedColumns <- Column -> Column -> Maybe Column
D.concatColumns Column
a'' Column
b''
DataFrame -> Maybe DataFrame
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (DataFrame -> Maybe DataFrame) -> DataFrame -> Maybe DataFrame
forall a b. (a -> b) -> a -> b
$ Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name Column
concatedColumns DataFrame
df
in
(DataFrame -> Text -> DataFrame)
-> DataFrame -> [Text] -> DataFrame
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' (DataFrame -> DataFrame -> DataFrame -> Text -> DataFrame
addColumns DataFrame
a DataFrame
b) DataFrame
D.empty (DataFrame -> [Text]
D.columnNames DataFrame
a [Text] -> [Text] -> [Text]
forall a. Eq a => [a] -> [a] -> [a]
`L.union` DataFrame -> [Text]
D.columnNames DataFrame
b)
instance Monoid D.DataFrame where
mempty :: DataFrame
mempty = DataFrame
D.empty
(|||) :: D.DataFrame -> D.DataFrame -> D.DataFrame
||| :: DataFrame -> DataFrame -> DataFrame
(|||) DataFrame
a DataFrame
b = (Text -> DataFrame -> DataFrame)
-> [Text] -> DataFrame -> DataFrame
forall a.
(a -> DataFrame -> DataFrame) -> [a] -> DataFrame -> DataFrame
D.fold (\Text
name DataFrame
acc -> Text -> Column -> DataFrame -> DataFrame
D.insertColumn Text
name (Text -> DataFrame -> Column
D.unsafeGetColumn Text
name DataFrame
b) DataFrame
acc) (DataFrame -> [Text]
D.columnNames DataFrame
b) DataFrame
a