{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE ExplicitNamespaces #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE InstanceSigs #-} module DataFrame.Internal.Row where import qualified Data.List as L import qualified Data.Map as M import qualified Data.Set as S import qualified Data.Text as T import qualified Data.Vector as V import qualified Data.Vector.Generic as VG import qualified Data.Vector.Unboxed as VU import qualified Data.Vector.Algorithms.Merge as VA import Control.Exception (throw) import Control.Monad.ST (runST) import DataFrame.Errors (DataFrameException(..)) import DataFrame.Internal.Column import DataFrame.Internal.DataFrame import DataFrame.Internal.Types import Data.Function (on) import Data.Maybe (fromMaybe) import Data.Typeable (Typeable, type (:~:) (..)) import Data.Word ( Word8, Word16, Word32, Word64 ) import Type.Reflection (TypeRep, typeOf, typeRep) import Data.Type.Equality (TestEquality(..)) data RowValue where Value :: (Columnable' a) => a -> RowValue instance Eq RowValue where (==) :: RowValue -> RowValue -> Bool (Value a a) == :: RowValue -> RowValue -> Bool == (Value a b) = Bool -> Maybe Bool -> Bool forall a. a -> Maybe a -> a fromMaybe Bool False (Maybe Bool -> Bool) -> Maybe Bool -> Bool forall a b. (a -> b) -> a -> b $ do a :~: a Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a a) (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a b) Bool -> Maybe Bool forall a. a -> Maybe a forall (m :: * -> *) a. Monad m => a -> m a return (Bool -> Maybe Bool) -> Bool -> Maybe Bool forall a b. (a -> b) -> a -> b $ a a a -> a -> Bool forall a. Eq a => a -> a -> Bool == a a b instance Ord RowValue where (<=) :: RowValue -> RowValue -> Bool (Value a a) <= :: RowValue -> RowValue -> Bool <= (Value a b) = Bool -> Maybe Bool -> Bool forall a. a -> Maybe a -> a fromMaybe Bool False (Maybe Bool -> Bool) -> Maybe Bool -> Bool forall a b. (a -> b) -> a -> b $ do a :~: a Refl <- TypeRep a -> TypeRep a -> Maybe (a :~: a) forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b) forall {k} (f :: k -> *) (a :: k) (b :: k). TestEquality f => f a -> f b -> Maybe (a :~: b) testEquality (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a a) (a -> TypeRep a forall a. Typeable a => a -> TypeRep a typeOf a b) Bool -> Maybe Bool forall a. a -> Maybe a forall (m :: * -> *) a. Monad m => a -> m a return (Bool -> Maybe Bool) -> Bool -> Maybe Bool forall a b. (a -> b) -> a -> b $ a a a -> a -> Bool forall a. Ord a => a -> a -> Bool <= a a b instance Show RowValue where show :: RowValue -> String show :: RowValue -> [Char] show (Value a a) = a -> [Char] forall a. Show a => a -> [Char] show a a toRowValue :: forall a . (Columnable' a) => a -> RowValue toRowValue :: forall a. Columnable' a => a -> RowValue toRowValue = a -> RowValue forall a. Columnable' a => a -> RowValue Value type Row = V.Vector RowValue toRowList :: [T.Text] -> DataFrame -> [Row] toRowList :: [Text] -> DataFrame -> [Row] toRowList [Text] names DataFrame df = let nameSet :: Set Text nameSet = [Text] -> Set Text forall a. Ord a => [a] -> Set a S.fromList [Text] names in (Int -> Row) -> [Int] -> [Row] forall a b. (a -> b) -> [a] -> [b] map (DataFrame -> Set Text -> Int -> Row mkRowRep DataFrame df Set Text nameSet) [Int 0..((Int, Int) -> Int forall a b. (a, b) -> a fst (DataFrame -> (Int, Int) dataframeDimensions DataFrame df) Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1)] toRowVector :: [T.Text] -> DataFrame -> V.Vector Row toRowVector :: [Text] -> DataFrame -> Vector Row toRowVector [Text] names DataFrame df = let nameSet :: Set Text nameSet = [Text] -> Set Text forall a. Ord a => [a] -> Set a S.fromList [Text] names in Int -> (Int -> Row) -> Vector Row forall a. Int -> (Int -> a) -> Vector a V.generate ((Int, Int) -> Int forall a b. (a, b) -> a fst (DataFrame -> (Int, Int) dataframeDimensions DataFrame df)) (DataFrame -> Set Text -> Int -> Row mkRowRep DataFrame df Set Text nameSet) mkRowFromArgs :: [T.Text] -> DataFrame -> Int -> Row mkRowFromArgs :: [Text] -> DataFrame -> Int -> Row mkRowFromArgs [Text] names DataFrame df Int i = (Text -> RowValue) -> Vector Text -> Row forall a b. (a -> b) -> Vector a -> Vector b V.map Text -> RowValue get ([Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text] names) where get :: Text -> RowValue get Text name = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Maybe Column Nothing -> DataFrameException -> RowValue forall a e. Exception e => e -> a throw (DataFrameException -> RowValue) -> DataFrameException -> RowValue forall a b. (a -> b) -> a -> b $ Text -> Text -> [Text] -> DataFrameException ColumnNotFoundException Text name Text "[INTERNAL] mkRowFromArgs" (((Text, Int) -> Text) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text, Int) -> Text forall a b. (a, b) -> a fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> a -> b $ Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (Map Text Int -> [(Text, Int)]) -> Map Text Int -> [(Text, Int)] forall a b. (a -> b) -> a -> b $ DataFrame -> Map Text Int columnIndices DataFrame df) Just (BoxedColumn Vector a column) -> a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue (Vector a column Vector a -> Int -> a forall a. Vector a -> Int -> a V.! Int i) Just (UnboxedColumn Vector a column) -> a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue (Vector a column Vector a -> Int -> a forall a. Unbox a => Vector a -> Int -> a VU.! Int i) Just (OptionalColumn Vector (Maybe a) column) -> Maybe a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue (Vector (Maybe a) column Vector (Maybe a) -> Int -> Maybe a forall a. Vector a -> Int -> a V.! Int i) mkRowRep :: DataFrame -> S.Set T.Text -> Int -> Row mkRowRep :: DataFrame -> Set Text -> Int -> Row mkRowRep DataFrame df Set Text names Int i = Int -> (Int -> RowValue) -> Row forall a. Int -> (Int -> a) -> Vector a V.generate (Set Text -> Int forall a. Set a -> Int S.size Set Text names) (\Int index -> Text -> RowValue get (Vector Text names' Vector Text -> Int -> Text forall a. Vector a -> Int -> a V.! Int index)) where inOrderIndexes :: [Text] inOrderIndexes = ((Text, Int) -> Text) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> [a] -> [b] map (Text, Int) -> Text forall a b. (a, b) -> a fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text] forall a b. (a -> b) -> a -> b $ ((Text, Int) -> (Text, Int) -> Ordering) -> [(Text, Int)] -> [(Text, Int)] forall a. (a -> a -> Ordering) -> [a] -> [a] L.sortBy (Int -> Int -> Ordering forall a. Ord a => a -> a -> Ordering compare (Int -> Int -> Ordering) -> ((Text, Int) -> Int) -> (Text, Int) -> (Text, Int) -> Ordering forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Text, Int) -> Int forall a b. (a, b) -> b snd) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)] forall a b. (a -> b) -> a -> b $ Map Text Int -> [(Text, Int)] forall k a. Map k a -> [(k, a)] M.toList (DataFrame -> Map Text Int columnIndices DataFrame df) names' :: Vector Text names' = [Text] -> Vector Text forall a. [a] -> Vector a V.fromList [Text n | Text n <- [Text] inOrderIndexes, Text -> Set Text -> Bool forall a. Ord a => a -> Set a -> Bool S.member Text n Set Text names] throwError :: Text -> RowValue throwError Text name = [Char] -> RowValue forall a. HasCallStack => [Char] -> a error ([Char] -> RowValue) -> [Char] -> RowValue forall a b. (a -> b) -> a -> b $ [Char] "Column " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Text -> [Char] T.unpack Text name [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] " has less items than " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ [Char] "the other columns at index " [Char] -> ShowS forall a. [a] -> [a] -> [a] ++ Int -> [Char] forall a. Show a => a -> [Char] show Int i get :: Text -> RowValue get Text name = case Text -> DataFrame -> Maybe Column getColumn Text name DataFrame df of Just (BoxedColumn Vector a c) -> case Vector a c Vector a -> Int -> Maybe a forall a. Vector a -> Int -> Maybe a V.!? Int i of Just a e -> a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue a e Maybe a Nothing -> Text -> RowValue throwError Text name Just (OptionalColumn Vector (Maybe a) c) -> case Vector (Maybe a) c Vector (Maybe a) -> Int -> Maybe (Maybe a) forall a. Vector a -> Int -> Maybe a V.!? Int i of Just Maybe a e -> Maybe a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue Maybe a e Maybe (Maybe a) Nothing -> Text -> RowValue throwError Text name Just (UnboxedColumn Vector a c) -> case Vector a c Vector a -> Int -> Maybe a forall a. Unbox a => Vector a -> Int -> Maybe a VU.!? Int i of Just a e -> a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue a e Maybe a Nothing -> Text -> RowValue throwError Text name Just (GroupedBoxedColumn Vector (Vector a) c) -> case Vector (Vector a) c Vector (Vector a) -> Int -> Maybe (Vector a) forall a. Vector a -> Int -> Maybe a V.!? Int i of Just Vector a e -> Vector a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue Vector a e Maybe (Vector a) Nothing -> Text -> RowValue throwError Text name Just (GroupedUnboxedColumn Vector (Vector a) c) -> case Vector (Vector a) c Vector (Vector a) -> Int -> Maybe (Vector a) forall a. Vector a -> Int -> Maybe a V.!? Int i of Just Vector a e -> Vector a -> RowValue forall a. Columnable' a => a -> RowValue toRowValue Vector a e Maybe (Vector a) Nothing -> Text -> RowValue throwError Text name sortedIndexes' :: Bool -> V.Vector Row -> VU.Vector Int sortedIndexes' :: Bool -> Vector Row -> Vector Int sortedIndexes' Bool asc Vector Row rows = (forall s. ST s (Vector Int)) -> Vector Int forall a. (forall s. ST s a) -> a runST ((forall s. ST s (Vector Int)) -> Vector Int) -> (forall s. ST s (Vector Int)) -> Vector Int forall a b. (a -> b) -> a -> b $ do MVector s (Int, Row) withIndexes <- Vector (Int, Row) -> ST s (Mutable Vector (PrimState (ST s)) (Int, Row)) forall (m :: * -> *) (v :: * -> *) a. (PrimMonad m, Vector v a) => v a -> m (Mutable v (PrimState m) a) VG.thaw (Vector Row -> Vector (Int, Row) forall a. Vector a -> Vector (Int, a) V.indexed Vector Row rows) Comparison (Int, Row) -> MVector (PrimState (ST s)) (Int, Row) -> ST s () forall (m :: * -> *) (v :: * -> * -> *) e. (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () VA.sortBy ((if Bool asc then Row -> Row -> Ordering forall a. Ord a => a -> a -> Ordering compare else (Row -> Row -> Ordering) -> Row -> Row -> Ordering forall a b c. (a -> b -> c) -> b -> a -> c flip Row -> Row -> Ordering forall a. Ord a => a -> a -> Ordering compare) (Row -> Row -> Ordering) -> ((Int, Row) -> Row) -> Comparison (Int, Row) forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c `on` (Int, Row) -> Row forall a b. (a, b) -> b snd) MVector s (Int, Row) MVector (PrimState (ST s)) (Int, Row) withIndexes Vector (Int, Row) sorted <- Mutable Vector (PrimState (ST s)) (Int, Row) -> ST s (Vector (Int, Row)) forall (m :: * -> *) (v :: * -> *) a. (PrimMonad m, Vector v a) => Mutable v (PrimState m) a -> m (v a) VG.unsafeFreeze Mutable Vector (PrimState (ST s)) (Int, Row) MVector s (Int, Row) withIndexes Vector Int -> ST s (Vector Int) forall a. a -> ST s a forall (m :: * -> *) a. Monad m => a -> m a return (Vector Int -> ST s (Vector Int)) -> Vector Int -> ST s (Vector Int) forall a b. (a -> b) -> a -> b $ Int -> (Int -> Int) -> Vector Int forall a. Unbox a => Int -> (Int -> a) -> Vector a VU.generate (Vector Row -> Int forall (v :: * -> *) a. Vector v a => v a -> Int VG.length Vector Row rows) (\Int i -> (Int, Row) -> Int forall a b. (a, b) -> a fst (Vector (Int, Row) sorted Vector (Int, Row) -> Int -> (Int, Row) forall (v :: * -> *) a. (HasCallStack, Vector v a) => v a -> Int -> a VG.! Int i))