{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Internal.Row where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Merge as VA
import qualified Data.Vector.Generic as VG
import qualified Data.Vector.Unboxed as VU

import Control.Exception (throw)
import Control.Monad.ST (runST)
import Data.Function (on)
import Data.Maybe (fromMaybe)
import Data.Type.Equality (TestEquality (..))
import Data.Typeable (type (:~:) (..))
import DataFrame.Errors (DataFrameException (..))
import DataFrame.Internal.Column
import DataFrame.Internal.DataFrame
import Text.ParserCombinators.ReadPrec (ReadPrec)
import Text.Read (
    Lexeme (Ident),
    lexP,
    parens,
    readListPrec,
    readListPrecDefault,
    readPrec,
 )
import Type.Reflection (typeOf, typeRep)

data Any where
    Value :: (Columnable a) => a -> Any

instance Eq Any where
    (==) :: Any -> Any -> Bool
    (Value a
a) == :: Any -> Any -> Bool
== (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
        a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
        Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
a
b

instance Ord Any where
    (<=) :: Any -> Any -> Bool
    (Value a
a) <= :: Any -> Any -> Bool
<= (Value a
b) = Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
False (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
        a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
a) (a -> TypeRep a
forall a. Typeable a => a -> TypeRep a
typeOf a
b)
        Bool -> Maybe Bool
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ a
a a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= a
a
b

instance Show Any where
    show :: Any -> String
    show :: Any -> [Char]
show (Value a
a) = Text -> [Char]
T.unpack (a -> Text
forall a. Columnable a => a -> Text
showValue a
a)

showValue :: forall a. (Columnable a) => a -> T.Text
showValue :: forall a. Columnable a => a -> Text
showValue a
v = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
    Just a :~: Text
Refl -> a
Text
v
    Maybe (a :~: Text)
Nothing -> case TypeRep a -> TypeRep [Char] -> Maybe (a :~: [Char])
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
        Just a :~: [Char]
Refl -> [Char] -> Text
T.pack a
[Char]
v
        Maybe (a :~: [Char])
Nothing -> ([Char] -> Text
T.pack ([Char] -> Text) -> (a -> [Char]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Char]
forall a. Show a => a -> [Char]
show) a
v

instance Read Any where
    readListPrec :: ReadPrec [Any]
    readListPrec :: ReadPrec [Any]
readListPrec = ReadPrec [Any]
forall a. Read a => ReadPrec [a]
readListPrecDefault

    readPrec :: ReadPrec Any
    readPrec :: ReadPrec Any
readPrec = ReadPrec Any -> ReadPrec Any
forall a. ReadPrec a -> ReadPrec a
parens (ReadPrec Any -> ReadPrec Any) -> ReadPrec Any -> ReadPrec Any
forall a b. (a -> b) -> a -> b
$ do
        Ident [Char]
"Value" <- ReadPrec Lexeme
lexP
        ReadPrec Any
forall a. Read a => ReadPrec a
readPrec

-- | Wraps a value into an \Any\ type. This helps up represent rows as heterogenous lists.
toAny :: forall a. (Columnable a) => a -> Any
toAny :: forall a. Columnable a => a -> Any
toAny = a -> Any
forall a. Columnable a => a -> Any
Value

-- | Unwraps a value from an \Any\ type.
fromAny :: forall a. (Columnable a) => Any -> Maybe a
fromAny :: forall a. Columnable a => Any -> Maybe a
fromAny (Value (a
v :: b)) = do
    a :~: a
Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b)
    a -> Maybe a
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
a
v

type Row = V.Vector Any

(!?) :: [a] -> Int -> Maybe a
!? :: forall a. [a] -> Int -> Maybe a
(!?) [] Int
_ = Maybe a
forall a. Maybe a
Nothing
(!?) (a
x : [a]
_) Int
0 = a -> Maybe a
forall a. a -> Maybe a
Just a
x
(!?) (a
x : [a]
xs) Int
n = [a] -> Int -> Maybe a
forall a. [a] -> Int -> Maybe a
(!?) [a]
xs (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)

mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow :: Int -> [[Any]] -> Column
mkColumnFromRow Int
i [[Any]]
rows = case [[Any]]
rows of
    [] -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
    ([Any]
row : [[Any]]
_) -> case [Any]
row [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
        Maybe Any
Nothing -> [Text] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([] :: [T.Text])
        Just (Value (a
v :: a)) -> [a] -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
[a] -> Column
fromList ([a] -> Column) -> [a] -> Column
forall a b. (a -> b) -> a -> b
$ [a] -> [a]
forall a. [a] -> [a]
reverse ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ ([a] -> [Any] -> [a]) -> [a] -> [[Any]] -> [a]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' [a] -> [Any] -> [a]
addToList [a
v] (Int -> [[Any]] -> [[Any]]
forall a. Int -> [a] -> [a]
drop Int
1 [[Any]]
rows)
          where
            addToList :: [a] -> [Any] -> [a]
addToList [a]
acc [Any]
r = case [Any]
r [Any] -> Int -> Maybe Any
forall a. [a] -> Int -> Maybe a
!? Int
i of
                Maybe Any
Nothing -> [a]
acc
                Just (Value (a
v' :: b)) -> case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
                    Maybe (a :~: a)
Nothing -> [a]
acc
                    Just a :~: a
Refl -> a
v' a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
[a]
acc

toRowList :: DataFrame -> [Row]
toRowList :: DataFrame -> [Row]
toRowList DataFrame
df =
    let
        nameSet :: Set Text
nameSet =
            [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList (((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
     in
        (Int -> Row) -> [Int] -> [Row]
forall a b. (a -> b) -> [a] -> [b]
map (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet) [Int
0 .. ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)]

toRowVector :: [T.Text] -> DataFrame -> V.Vector Row
toRowVector :: [Text] -> DataFrame -> Vector Row
toRowVector [Text]
names DataFrame
df =
    let
        nameSet :: Set Text
nameSet = [Text] -> Set Text
forall a. Ord a => [a] -> Set a
S.fromList [Text]
names
     in
        Int -> (Int -> Row) -> Vector Row
forall a. Int -> (Int -> a) -> Vector a
V.generate ((Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)) (DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
nameSet)

mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row
mkRowFromArgs :: [Text] -> DataFrame -> Int -> Row
mkRowFromArgs [Text]
names DataFrame
df Int
i = (Text -> Any) -> Vector Text -> Row
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Any
get ([Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
names)
  where
    get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
        Maybe Column
Nothing ->
            DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$
                Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException
                    Text
name
                    Text
"[INTERNAL] mkRowFromArgs"
                    (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)
        Just (BoxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Vector a -> Int -> a
V.! Int
i)
        Just (UnboxedColumn Vector a
column) -> a -> Any
forall a. Columnable a => a -> Any
toAny (Vector a
column Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
VU.! Int
i)
        Just (OptionalColumn Vector (Maybe a)
column) -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny (Vector (Maybe a)
column Vector (Maybe a) -> Int -> Maybe a
forall a. Vector a -> Int -> a
V.! Int
i)

mkRowRep :: DataFrame -> S.Set T.Text -> Int -> Row
mkRowRep :: DataFrame -> Set Text -> Int -> Row
mkRowRep DataFrame
df Set Text
names Int
i = Int -> (Int -> Any) -> Row
forall a. Int -> (Int -> a) -> Vector a
V.generate (Set Text -> Int
forall a. Set a -> Int
S.size Set Text
names) (\Int
index -> Text -> Any
get (Vector Text
names' Vector Text -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
index))
  where
    inOrderIndexes :: [Text]
inOrderIndexes = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$ ((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Int -> Int -> Ordering)
-> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Text, Int) -> Int
forall a b. (a, b) -> b
snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$ Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)
    names' :: Vector Text
names' = [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text
n | Text
n <- [Text]
inOrderIndexes, Text -> Set Text -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member Text
n Set Text
names]
    throwError :: Text -> Any
throwError Text
name =
        [Char] -> Any
forall a. HasCallStack => [Char] -> a
error ([Char] -> Any) -> [Char] -> Any
forall a b. (a -> b) -> a -> b
$
            [Char]
"Column "
                [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
name
                [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" has less items than "
                [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"the other columns at index "
                [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
i
    get :: Text -> Any
get Text
name = case Text -> DataFrame -> Maybe Column
getColumn Text
name DataFrame
df of
        Just (BoxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
            Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
            Maybe a
Nothing -> Text -> Any
throwError Text
name
        Just (OptionalColumn Vector (Maybe a)
c) -> case Vector (Maybe a)
c Vector (Maybe a) -> Int -> Maybe (Maybe a)
forall a. Vector a -> Int -> Maybe a
V.!? Int
i of
            Just Maybe a
e -> Maybe a -> Any
forall a. Columnable a => a -> Any
toAny Maybe a
e
            Maybe (Maybe a)
Nothing -> Text -> Any
throwError Text
name
        Just (UnboxedColumn Vector a
c) -> case Vector a
c Vector a -> Int -> Maybe a
forall a. Unbox a => Vector a -> Int -> Maybe a
VU.!? Int
i of
            Just a
e -> a -> Any
forall a. Columnable a => a -> Any
toAny a
e
            Maybe a
Nothing -> Text -> Any
throwError Text
name
        Maybe Column
Nothing ->
            DataFrameException -> Any
forall a e. Exception e => e -> a
throw (DataFrameException -> Any) -> DataFrameException -> Any
forall a b. (a -> b) -> a -> b
$ Text -> Text -> [Text] -> DataFrameException
ColumnNotFoundException Text
name Text
"mkRowRep" (Map Text Int -> [Text]
forall k a. Map k a -> [k]
M.keys (Map Text Int -> [Text]) -> Map Text Int -> [Text]
forall a b. (a -> b) -> a -> b
$ DataFrame -> Map Text Int
columnIndices DataFrame
df)

sortedIndexes' :: Bool -> V.Vector Row -> VU.Vector Int
sortedIndexes' :: Bool -> Vector Row -> Vector Int
sortedIndexes' Bool
asc Vector Row
rows = (forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s (Vector Int)) -> Vector Int)
-> (forall s. ST s (Vector Int)) -> Vector Int
forall a b. (a -> b) -> a -> b
$ do
    MVector s (Int, Row)
withIndexes <- Vector (Int, Row)
-> ST s (Mutable Vector (PrimState (ST s)) (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
v a -> m (Mutable v (PrimState m) a)
VG.thaw (Vector Row -> Vector (Int, Row)
forall a. Vector a -> Vector (Int, a)
V.indexed Vector Row
rows)
    Comparison (Int, Row)
-> MVector (PrimState (ST s)) (Int, Row) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
VA.sortBy ((if Bool
asc then Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare else (Row -> Row -> Ordering) -> Row -> Row -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip Row -> Row -> Ordering
forall a. Ord a => a -> a -> Ordering
compare) (Row -> Row -> Ordering)
-> ((Int, Row) -> Row) -> Comparison (Int, Row)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, Row) -> Row
forall a b. (a, b) -> b
snd) MVector s (Int, Row)
MVector (PrimState (ST s)) (Int, Row)
withIndexes
    Vector (Int, Row)
sorted <- Mutable Vector (PrimState (ST s)) (Int, Row)
-> ST s (Vector (Int, Row))
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
VG.unsafeFreeze Mutable Vector (PrimState (ST s)) (Int, Row)
MVector s (Int, Row)
withIndexes
    Vector Int -> ST s (Vector Int)
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> ST s (Vector Int))
-> Vector Int -> ST s (Vector Int)
forall a b. (a -> b) -> a -> b
$ Int -> (Int -> Int) -> Vector Int
forall a. Unbox a => Int -> (Int -> a) -> Vector a
VU.generate (Vector Row -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length Vector Row
rows) (\Int
i -> (Int, Row) -> Int
forall a b. (a, b) -> a
fst (Vector (Int, Row)
sorted Vector (Int, Row) -> Int -> (Int, Row)
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
v a -> Int -> a
VG.! Int
i))