{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Typed.Freeze (
    -- * Safe boundary
    freeze,
    freezeWithError,

    -- * Escape hatches
    thaw,
    unsafeFreeze,
) where

import qualified Data.Text as T
import Type.Reflection (SomeTypeRep)

import qualified DataFrame.Internal.Column as C
import qualified DataFrame.Internal.DataFrame as D
import DataFrame.Operations.Core (columnNames)
import DataFrame.Typed.Schema (KnownSchema (..))
import DataFrame.Typed.Types (TypedDataFrame (..))

{- | Validate that an untyped 'DataFrame' matches the expected schema @cols@,
then wrap it. Returns 'Nothing' on mismatch.
-}
freeze ::
    forall cols. (KnownSchema cols) => D.DataFrame -> Maybe (TypedDataFrame cols)
freeze :: forall (cols :: [*]).
KnownSchema cols =>
DataFrame -> Maybe (TypedDataFrame cols)
freeze DataFrame
df = case forall (cols :: [*]).
KnownSchema cols =>
DataFrame -> Either Text ()
validateSchema @cols DataFrame
df of
    Left Text
_ -> Maybe (TypedDataFrame cols)
forall a. Maybe a
Nothing
    Right ()
_ -> TypedDataFrame cols -> Maybe (TypedDataFrame cols)
forall a. a -> Maybe a
Just (DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF DataFrame
df)

-- | Like 'freeze' but returns a descriptive error message on failure.
freezeWithError ::
    forall cols.
    (KnownSchema cols) =>
    D.DataFrame -> Either T.Text (TypedDataFrame cols)
freezeWithError :: forall (cols :: [*]).
KnownSchema cols =>
DataFrame -> Either Text (TypedDataFrame cols)
freezeWithError DataFrame
df = case forall (cols :: [*]).
KnownSchema cols =>
DataFrame -> Either Text ()
validateSchema @cols DataFrame
df of
    Left Text
err -> Text -> Either Text (TypedDataFrame cols)
forall a b. a -> Either a b
Left Text
err
    Right ()
_ -> TypedDataFrame cols -> Either Text (TypedDataFrame cols)
forall a b. b -> Either a b
Right (DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF DataFrame
df)

{- | Unwrap a typed DataFrame back to the untyped representation.
Always safe; discards type information.
-}
thaw :: TypedDataFrame cols -> D.DataFrame
thaw :: forall (cols :: [*]). TypedDataFrame cols -> DataFrame
thaw (TDF DataFrame
df) = DataFrame
df

{- | Wrap an untyped DataFrame without any validation.
Used internally after delegation where the library guarantees schema correctness.
-}
unsafeFreeze :: D.DataFrame -> TypedDataFrame cols
unsafeFreeze :: forall (cols :: [*]). DataFrame -> TypedDataFrame cols
unsafeFreeze = DataFrame -> TypedDataFrame cols
forall (cols :: [*]). DataFrame -> TypedDataFrame cols
TDF

validateSchema ::
    forall cols.
    (KnownSchema cols) =>
    D.DataFrame -> Either T.Text ()
validateSchema :: forall (cols :: [*]).
KnownSchema cols =>
DataFrame -> Either Text ()
validateSchema DataFrame
df = ((Text, SomeTypeRep) -> Either Text ())
-> [(Text, SomeTypeRep)] -> Either Text ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (Text, SomeTypeRep) -> Either Text ()
checkCol (forall (cols :: [*]). KnownSchema cols => [(Text, SomeTypeRep)]
schemaEvidence @cols)
  where
    checkCol :: (T.Text, SomeTypeRep) -> Either T.Text ()
    checkCol :: (Text, SomeTypeRep) -> Either Text ()
checkCol (Text
name, SomeTypeRep
expectedRep) = case Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
df of
        Maybe Column
Nothing ->
            Text -> Either Text ()
forall a b. a -> Either a b
Left (Text -> Either Text ()) -> Text -> Either Text ()
forall a b. (a -> b) -> a -> b
$
                Text
"Column '"
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"' not found in DataFrame. "
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"Available columns: "
                    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack ([Text] -> String
forall a. Show a => a -> String
show (DataFrame -> [Text]
columnNames DataFrame
df))
        Just Column
col ->
            if SomeTypeRep -> Column -> Bool
matchesType SomeTypeRep
expectedRep Column
col
                then () -> Either Text ()
forall a b. b -> Either a b
Right ()
                else
                    Text -> Either Text ()
forall a b. a -> Either a b
Left (Text -> Either Text ()) -> Text -> Either Text ()
forall a b. (a -> b) -> a -> b
$
                        Text
"Type mismatch on column '"
                            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name
                            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"': expected "
                            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (SomeTypeRep -> String
forall a. Show a => a -> String
show SomeTypeRep
expectedRep)
                            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
", got "
                            Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Column -> String
C.columnTypeString Column
col)

-- | Check if a Column's element type matches the expected SomeTypeRep.
matchesType :: SomeTypeRep -> C.Column -> Bool
matchesType :: SomeTypeRep -> Column -> Bool
matchesType SomeTypeRep
expected Column
col = String -> Text
T.pack (SomeTypeRep -> String
forall a. Show a => a -> String
show SomeTypeRep
expected) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== String -> Text
T.pack (Column -> String
C.columnTypeString Column
col)