{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module DataFrame.Operations.Typing where

import qualified Data.Text as T
import qualified Data.Vector as V

import Data.Maybe (fromMaybe)
import qualified Data.Proxy as P
import Data.Time
import Data.Type.Equality (TestEquality (..), type (:~:) (Refl))
import DataFrame.Internal.Column (Column (..), fromVector)
import DataFrame.Internal.DataFrame (DataFrame (..))
import DataFrame.Internal.Parsing
import DataFrame.Internal.Schema
import Text.Read
import Type.Reflection (typeRep)

type DateFormat = String

parseDefaults :: [T.Text] -> Int -> Bool -> DateFormat -> DataFrame -> DataFrame
parseDefaults :: [Text] -> Int -> Bool -> DateFormat -> DataFrame -> DataFrame
parseDefaults [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat DataFrame
df = DataFrame
df{columns = V.map (parseDefault missing n safeRead dateFormat) (columns df)}

parseDefault :: [T.Text] -> Int -> Bool -> DateFormat -> Column -> Column
parseDefault :: [Text] -> Int -> Bool -> DateFormat -> Column -> Column
parseDefault [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat (BoxedColumn (Vector a
c :: V.Vector a)) =
    case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Maybe (a :~: Text)
Nothing -> case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep DateFormat -> Maybe (a :~: DateFormat)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
            Just a :~: DateFormat
Refl -> [Text] -> Int -> Bool -> DateFormat -> Vector Text -> Column
parseFromExamples [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat ((DateFormat -> Text) -> Vector DateFormat -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map DateFormat -> Text
T.pack Vector a
Vector DateFormat
c)
            Maybe (a :~: DateFormat)
Nothing -> Vector a -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector a
c
        Just a :~: Text
Refl -> [Text] -> Int -> Bool -> DateFormat -> Vector Text -> Column
parseFromExamples [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat Vector a
Vector Text
c
parseDefault [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat (OptionalColumn (Vector (Maybe a)
c :: V.Vector (Maybe a))) =
    case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Maybe (a :~: Text)
Nothing -> case (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) TypeRep a -> TypeRep DateFormat -> Maybe (a :~: DateFormat)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
`testEquality` (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @String) of
            Just a :~: DateFormat
Refl ->
                [Text] -> Int -> Bool -> DateFormat -> Vector Text -> Column
parseFromExamples
                    [Text]
missing
                    Int
n
                    Bool
safeRead
                    DateFormat
dateFormat
                    ((Maybe DateFormat -> Text)
-> Vector (Maybe DateFormat) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (DateFormat -> Text
T.pack (DateFormat -> Text)
-> (Maybe DateFormat -> DateFormat) -> Maybe DateFormat -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DateFormat -> Maybe DateFormat -> DateFormat
forall a. a -> Maybe a -> a
fromMaybe DateFormat
"") Vector (Maybe a)
Vector (Maybe DateFormat)
c)
            Maybe (a :~: DateFormat)
Nothing -> Vector (Maybe a) -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector (Maybe a)
c
        Just a :~: Text
Refl -> [Text] -> Int -> Bool -> DateFormat -> Vector Text -> Column
parseFromExamples [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat ((Maybe Text -> Text) -> Vector (Maybe Text) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"") Vector (Maybe a)
Vector (Maybe Text)
c)
parseDefault [Text]
_ Int
_ Bool
_ DateFormat
_ Column
column = Column
column

parseFromExamples ::
    [T.Text] -> Int -> Bool -> DateFormat -> V.Vector T.Text -> Column
parseFromExamples :: [Text] -> Int -> Bool -> DateFormat -> Vector Text -> Column
parseFromExamples [Text]
missing Int
n Bool
safeRead DateFormat
dateFormat Vector Text
cols =
    let
        converter :: Text -> Maybe Text
converter = if Bool
safeRead then [Text] -> Text -> Maybe Text
convertNullish [Text]
missing else Text -> Maybe Text
convertOnlyEmpty
        examples :: Vector (Maybe Text)
examples = (Text -> Maybe Text) -> Vector Text -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Maybe Text
converter (Int -> Vector Text -> Vector Text
forall a. Int -> Vector a -> Vector a
V.take Int
n Vector Text
cols)
        asMaybeText :: Vector (Maybe Text)
asMaybeText = (Text -> Maybe Text) -> Vector Text -> Vector (Maybe Text)
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> Maybe Text
converter Vector Text
cols
     in
        case DateFormat -> Vector (Maybe Text) -> ParsingAssumption
makeParsingAssumption DateFormat
dateFormat Vector (Maybe Text)
examples of
            ParsingAssumption
BoolAssumption -> Vector (Maybe Text) -> Column
handleBoolAssumption Vector (Maybe Text)
asMaybeText
            ParsingAssumption
IntAssumption -> Vector (Maybe Text) -> Column
handleIntAssumption Vector (Maybe Text)
asMaybeText
            ParsingAssumption
DoubleAssumption -> Vector (Maybe Text) -> Column
handleDoubleAssumption Vector (Maybe Text)
asMaybeText
            ParsingAssumption
TextAssumption -> Vector (Maybe Text) -> Column
handleTextAssumption Vector (Maybe Text)
asMaybeText
            ParsingAssumption
DateAssumption -> DateFormat -> Vector (Maybe Text) -> Column
handleDateAssumption DateFormat
dateFormat Vector (Maybe Text)
asMaybeText
            ParsingAssumption
NoAssumption -> DateFormat -> Vector (Maybe Text) -> Column
handleNoAssumption DateFormat
dateFormat Vector (Maybe Text)
asMaybeText

handleBoolAssumption :: V.Vector (Maybe T.Text) -> Column
handleBoolAssumption :: Vector (Maybe Text) -> Column
handleBoolAssumption Vector (Maybe Text)
asMaybeText
    | Bool
parsableAsBool =
        Column -> (Vector Bool -> Column) -> Maybe (Vector Bool) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool) Vector Bool -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Bool) -> Maybe (Vector Bool)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Bool)
asMaybeBool)
    | Bool
otherwise = Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe Text -> Maybe Bool)
-> Vector (Maybe Text) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Bool
Text -> Maybe Bool
readBool) Vector (Maybe Text)
asMaybeText
    parsableAsBool :: Bool
parsableAsBool = Vector (Maybe Text) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Bool)
asMaybeBool

handleIntAssumption :: V.Vector (Maybe T.Text) -> Column
handleIntAssumption :: Vector (Maybe Text) -> Column
handleIntAssumption Vector (Maybe Text)
asMaybeText
    | Bool
parsableAsInt =
        Column -> (Vector Int -> Column) -> Maybe (Vector Int) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt) Vector Int -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Int) -> Maybe (Vector Int)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Int)
asMaybeInt)
    | Bool
parsableAsDouble =
        Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
    | Bool
otherwise = Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)
  where
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe Text -> Maybe Int)
-> Vector (Maybe Text) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt) Vector (Maybe Text)
asMaybeText
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe Text -> Maybe Double)
-> Vector (Maybe Text) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble) Vector (Maybe Text)
asMaybeText
    parsableAsInt :: Bool
parsableAsInt =
        Vector (Maybe Text) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Int)
asMaybeInt
            Bool -> Bool -> Bool
&& Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble

handleDoubleAssumption :: V.Vector (Maybe T.Text) -> Column
handleDoubleAssumption :: Vector (Maybe Text) -> Column
handleDoubleAssumption Vector (Maybe Text)
asMaybeText
    | Bool
parsableAsDouble =
        Column
-> (Vector Double -> Column) -> Maybe (Vector Double) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble) Vector Double -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Double) -> Maybe (Vector Double)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Double)
asMaybeDouble)
    | Bool
otherwise = Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)
  where
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe Text -> Maybe Double)
-> Vector (Maybe Text) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble) Vector (Maybe Text)
asMaybeText
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble

handleDateAssumption :: DateFormat -> V.Vector (Maybe T.Text) -> Column
handleDateAssumption :: DateFormat -> Vector (Maybe Text) -> Column
handleDateAssumption DateFormat
dateFormat Vector (Maybe Text)
asMaybeText
    | Bool
parsableAsDate =
        Column -> (Vector Day -> Column) -> Maybe (Vector Day) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate) Vector Day -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Day) -> Maybe (Vector Day)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Day)
asMaybeDate)
    | Bool
otherwise = Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)
  where
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe Text -> Maybe Day)
-> Vector (Maybe Text) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= DateFormat -> Text -> Maybe Day
parseTimeOpt DateFormat
dateFormat) Vector (Maybe Text)
asMaybeText
    parsableAsDate :: Bool
parsableAsDate = Vector (Maybe Text) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Day)
asMaybeDate

handleTextAssumption :: V.Vector (Maybe T.Text) -> Column
handleTextAssumption :: Vector (Maybe Text) -> Column
handleTextAssumption Vector (Maybe Text)
asMaybeText = Column -> (Vector Text -> Column) -> Maybe (Vector Text) -> Column
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText) Vector Text -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector (Vector (Maybe Text) -> Maybe (Vector Text)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a.
Applicative f =>
Vector (f a) -> f (Vector a)
sequenceA Vector (Maybe Text)
asMaybeText)

handleNoAssumption :: DateFormat -> V.Vector (Maybe T.Text) -> Column
handleNoAssumption :: DateFormat -> Vector (Maybe Text) -> Column
handleNoAssumption DateFormat
dateFormat Vector (Maybe Text)
asMaybeText
    -- No need to check for null values. If we are in this condition, that
    -- means that the examples consisted only of null values, so we can
    -- confidently know that this column must be an OptionalColumn
    | (Maybe Text -> Bool) -> Vector (Maybe Text) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe Text
forall a. Maybe a
Nothing) Vector (Maybe Text)
asMaybeText = Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText
    | Bool
parsableAsBool = Vector (Maybe Bool) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Bool)
asMaybeBool
    | Bool
parsableAsInt = Vector (Maybe Int) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Int)
asMaybeInt
    | Bool
parsableAsDouble = Vector (Maybe Double) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Double)
asMaybeDouble
    | Bool
parsableAsDate = Vector (Maybe Day) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Day)
asMaybeDate
    | Bool
otherwise = Vector (Maybe Text) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector Vector (Maybe Text)
asMaybeText
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe Text -> Maybe Bool)
-> Vector (Maybe Text) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Bool
Text -> Maybe Bool
readBool) Vector (Maybe Text)
asMaybeText
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe Text -> Maybe Int)
-> Vector (Maybe Text) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt) Vector (Maybe Text)
asMaybeText
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe Text -> Maybe Double)
-> Vector (Maybe Text) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble) Vector (Maybe Text)
asMaybeText
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe Text -> Maybe Day)
-> Vector (Maybe Text) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= DateFormat -> Text -> Maybe Day
parseTimeOpt DateFormat
dateFormat) Vector (Maybe Text)
asMaybeText
    parsableAsBool :: Bool
parsableAsBool = Vector (Maybe Text) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Bool)
asMaybeBool
    parsableAsInt :: Bool
parsableAsInt =
        Vector (Maybe Text) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Int)
asMaybeInt
            Bool -> Bool -> Bool
&& Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble
    parsableAsDouble :: Bool
parsableAsDouble = Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble
    parsableAsDate :: Bool
parsableAsDate = Vector (Maybe Text) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Day)
asMaybeDate

convertNullish :: [T.Text] -> T.Text -> Maybe T.Text
convertNullish :: [Text] -> Text -> Maybe Text
convertNullish [Text]
missing Text
v = if Text -> Bool
isNullish Text
v Bool -> Bool -> Bool
|| Text
v Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
missing then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
v

convertOnlyEmpty :: T.Text -> Maybe T.Text
convertOnlyEmpty :: Text -> Maybe Text
convertOnlyEmpty Text
v = if Text
v Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"" then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
v

parseTimeOpt :: DateFormat -> T.Text -> Maybe Day
parseTimeOpt :: DateFormat -> Text -> Maybe Day
parseTimeOpt DateFormat
dateFormat Text
s =
    Bool -> TimeLocale -> DateFormat -> DateFormat -> Maybe Day
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> DateFormat -> DateFormat -> m t
parseTimeM {- Accept leading/trailing whitespace -}
        Bool
True
        TimeLocale
defaultTimeLocale
        DateFormat
dateFormat
        (Text -> DateFormat
T.unpack Text
s)

unsafeParseTime :: DateFormat -> T.Text -> Day
unsafeParseTime :: DateFormat -> Text -> Day
unsafeParseTime DateFormat
dateFormat Text
s =
    Bool -> TimeLocale -> DateFormat -> DateFormat -> Day
forall t.
ParseTime t =>
Bool -> TimeLocale -> DateFormat -> DateFormat -> t
parseTimeOrError {- Accept leading/trailing whitespace -}
        Bool
True
        TimeLocale
defaultTimeLocale
        DateFormat
dateFormat
        (Text -> DateFormat
T.unpack Text
s)

hasNullValues :: (Eq a) => V.Vector (Maybe a) -> Bool
hasNullValues :: forall a. Eq a => Vector (Maybe a) -> Bool
hasNullValues = (Maybe a -> Bool) -> Vector (Maybe a) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.any (Maybe a -> Maybe a -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe a
forall a. Maybe a
Nothing)

vecSameConstructor :: V.Vector (Maybe a) -> V.Vector (Maybe b) -> Bool
vecSameConstructor :: forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe a)
xs Vector (Maybe b)
ys = (Vector (Maybe a) -> Int
forall a. Vector a -> Int
V.length Vector (Maybe a)
xs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector (Maybe b) -> Int
forall a. Vector a -> Int
V.length Vector (Maybe b)
ys) Bool -> Bool -> Bool
&& Vector Bool -> Bool
V.and ((Maybe a -> Maybe b -> Bool)
-> Vector (Maybe a) -> Vector (Maybe b) -> Vector Bool
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Maybe a -> Maybe b -> Bool
forall a b. Maybe a -> Maybe b -> Bool
hasSameConstructor Vector (Maybe a)
xs Vector (Maybe b)
ys)
  where
    hasSameConstructor :: Maybe a -> Maybe b -> Bool
    hasSameConstructor :: forall a b. Maybe a -> Maybe b -> Bool
hasSameConstructor (Just a
_) (Just b
_) = Bool
True
    hasSameConstructor Maybe a
Nothing Maybe b
Nothing = Bool
True
    hasSameConstructor Maybe a
_ Maybe b
_ = Bool
False

makeParsingAssumption ::
    DateFormat -> V.Vector (Maybe T.Text) -> ParsingAssumption
makeParsingAssumption :: DateFormat -> Vector (Maybe Text) -> ParsingAssumption
makeParsingAssumption DateFormat
dateFormat Vector (Maybe Text)
asMaybeText
    -- All the examples are "NA", "Null", "", so we can't make any shortcut
    -- assumptions and just have to go the long way.
    | (Maybe Text -> Bool) -> Vector (Maybe Text) -> Bool
forall a. (a -> Bool) -> Vector a -> Bool
V.all (Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Maybe Text
forall a. Maybe a
Nothing) Vector (Maybe Text)
asMaybeText = ParsingAssumption
NoAssumption
    -- After accounting for nulls, parsing for Ints and Doubles results in the
    -- same corresponding positions of Justs and Nothings, so we assume
    -- that the best way to parse is Int
    | Vector (Maybe Text) -> Vector (Maybe Bool) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Bool)
asMaybeBool = ParsingAssumption
BoolAssumption
    | Vector (Maybe Text) -> Vector (Maybe Int) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Int)
asMaybeInt
        Bool -> Bool -> Bool
&& Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble =
        ParsingAssumption
IntAssumption
    -- After accounting for nulls, the previous condition fails, so some (or none) can be parsed as Ints
    -- and some can be parsed as Doubles, so we make the assumpotion of doubles.
    | Vector (Maybe Text) -> Vector (Maybe Double) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Double)
asMaybeDouble = ParsingAssumption
DoubleAssumption
    -- After accounting for nulls, parsing for Dates results in the same corresponding
    -- positions of Justs and Nothings, so we assume that the best way to parse is Date.
    | Vector (Maybe Text) -> Vector (Maybe Day) -> Bool
forall a b. Vector (Maybe a) -> Vector (Maybe b) -> Bool
vecSameConstructor Vector (Maybe Text)
asMaybeText Vector (Maybe Day)
asMaybeDate = ParsingAssumption
DateAssumption
    | Bool
otherwise = ParsingAssumption
TextAssumption
  where
    asMaybeBool :: Vector (Maybe Bool)
asMaybeBool = (Maybe Text -> Maybe Bool)
-> Vector (Maybe Text) -> Vector (Maybe Bool)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Bool) -> Maybe Bool
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Bool
Text -> Maybe Bool
readBool) Vector (Maybe Text)
asMaybeText
    asMaybeInt :: Vector (Maybe Int)
asMaybeInt = (Maybe Text -> Maybe Int)
-> Vector (Maybe Text) -> Vector (Maybe Int)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Int
Text -> Maybe Int
readInt) Vector (Maybe Text)
asMaybeText
    asMaybeDouble :: Vector (Maybe Double)
asMaybeDouble = (Maybe Text -> Maybe Double)
-> Vector (Maybe Text) -> Vector (Maybe Double)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Double) -> Maybe Double
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= HasCallStack => Text -> Maybe Double
Text -> Maybe Double
readDouble) Vector (Maybe Text)
asMaybeText
    asMaybeDate :: Vector (Maybe Day)
asMaybeDate = (Maybe Text -> Maybe Day)
-> Vector (Maybe Text) -> Vector (Maybe Day)
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Maybe Text -> (Text -> Maybe Day) -> Maybe Day
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= DateFormat -> Text -> Maybe Day
parseTimeOpt DateFormat
dateFormat) Vector (Maybe Text)
asMaybeText

data ParsingAssumption
    = BoolAssumption
    | IntAssumption
    | DoubleAssumption
    | DateAssumption
    | NoAssumption
    | TextAssumption

parseWithTypes :: [SchemaType] -> DataFrame -> DataFrame
parseWithTypes :: [SchemaType] -> DataFrame -> DataFrame
parseWithTypes [SchemaType]
ts DataFrame
df = DataFrame
df{columns = go 0 ts (columns df)}
  where
    go :: Int -> [SchemaType] -> V.Vector Column -> V.Vector Column
    go :: Int -> [SchemaType] -> Vector Column -> Vector Column
go Int
n [] Vector Column
xs = Vector Column
xs
    go Int
n (SchemaType
t : [SchemaType]
rest) Vector Column
xs
        | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Vector Column -> Int
forall a. Vector a -> Int
V.length Vector Column
xs = Vector Column
xs
        | Bool
otherwise =
            Int -> [SchemaType] -> Vector Column -> Vector Column
go (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [SchemaType]
rest (Vector Column -> Vector (Int, Column) -> Vector Column
forall a. Vector a -> Vector (Int, a) -> Vector a
V.update Vector Column
xs ([(Int, Column)] -> Vector (Int, Column)
forall a. [a] -> Vector a
V.fromList [(Int
n, SchemaType -> Column -> Column
asType SchemaType
t (Vector Column
xs Vector Column -> Int -> Column
forall a. Vector a -> Int -> a
V.! Int
n))]))
    asType :: SchemaType -> Column -> Column
    asType :: SchemaType -> Column -> Column
asType (SType (Proxy a
_ :: P.Proxy a)) c :: Column
c@(BoxedColumn (Vector a
col :: V.Vector b)) = case TypeRep a -> TypeRep a -> Maybe (a :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
        Just a :~: a
Refl -> Column
c
        Maybe (a :~: a)
Nothing -> case TypeRep Text -> TypeRep a -> Maybe (Text :~: a)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @b) of
            Just Text :~: a
Refl -> Vector (Maybe a) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector ((Text -> Maybe a) -> Vector Text -> Vector (Maybe a)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((forall a. Read a => DateFormat -> Maybe a
readMaybe @a) (DateFormat -> Maybe a) -> (Text -> DateFormat) -> Text -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> DateFormat
T.unpack) Vector a
Vector Text
col)
            Maybe (Text :~: a)
Nothing -> Vector (Maybe a) -> Column
forall a.
(Columnable a, ColumnifyRep (KindOf a) a) =>
Vector a -> Column
fromVector ((a -> Maybe a) -> Vector a -> Vector (Maybe a)
forall a b. (a -> b) -> Vector a -> Vector b
V.map ((forall a. Read a => DateFormat -> Maybe a
readMaybe @a) (DateFormat -> Maybe a) -> (a -> DateFormat) -> a -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> DateFormat
forall a. Show a => a -> DateFormat
show) Vector a
col)
    asType SchemaType
_ Column
c = Column
c