{-# LANGUAGE DataKinds #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
{-# LANGUAGE TypeApplications #-}
module DataFrame.Typed.TH (
deriveSchema,
deriveSchemaFromCsvFile,
TypedDataFrame,
Column,
) where
import Control.Monad.IO.Class
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import Language.Haskell.TH
import qualified DataFrame.IO.CSV as D
import qualified DataFrame.Internal.Column as C
import qualified DataFrame.Internal.DataFrame as D
import DataFrame.Typed.Types (Column, TypedDataFrame)
deriveSchema :: String -> D.DataFrame -> DecsQ
deriveSchema :: String -> DataFrame -> DecsQ
deriveSchema String
typeName DataFrame
df = do
let cols :: [(Text, String)]
cols = DataFrame -> [(Text, String)]
getSchemaInfo DataFrame
df
let names :: [Text]
names = ((Text, String) -> Text) -> [(Text, String)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, String) -> Text
forall a b. (a, b) -> a
fst [(Text, String)]
cols
case [Text] -> Maybe Text
forall a. Eq a => [a] -> Maybe a
findDuplicate [Text]
names of
Just Text
dup -> String -> Q ()
forall a. String -> Q a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Q ()) -> String -> Q ()
forall a b. (a -> b) -> a -> b
$ String
"Duplicate column name in DataFrame: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
dup
Maybe Text
Nothing -> () -> Q ()
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
[Type]
colTypes <- ((Text, String) -> Q Type) -> [(Text, String)] -> Q [Type]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (Text, String) -> Q Type
mkColumnType [(Text, String)]
cols
let schemaType :: Type
schemaType = (Type -> Type -> Type) -> Type -> [Type] -> Type
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Type
t Type
acc -> Type
PromotedConsT Type -> Type -> Type
`AppT` Type
t Type -> Type -> Type
`AppT` Type
acc) Type
PromotedNilT [Type]
colTypes
let synName :: Name
synName = String -> Name
mkName String
typeName
[Dec] -> DecsQ
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Name -> [TyVarBndr BndrVis] -> Type -> Dec
TySynD Name
synName [] Type
schemaType]
deriveSchemaFromCsvFile :: String -> String -> DecsQ
deriveSchemaFromCsvFile :: String -> String -> DecsQ
deriveSchemaFromCsvFile String
typeName String
path = do
DataFrame
df <- IO DataFrame -> Q DataFrame
forall a. IO a -> Q a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (String -> IO DataFrame
D.readCsv String
path)
String -> DataFrame -> DecsQ
deriveSchema String
typeName DataFrame
df
getSchemaInfo :: D.DataFrame -> [(T.Text, String)]
getSchemaInfo :: DataFrame -> [(Text, String)]
getSchemaInfo DataFrame
df =
let orderedNames :: [Text]
orderedNames =
((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Int) -> Text
forall a b. (a, b) -> a
fst ([(Text, Int)] -> [Text]) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> a -> b
$
((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (\(Text
_, Int
a) (Text
_, Int
b) -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
a Int
b) ([(Text, Int)] -> [(Text, Int)]) -> [(Text, Int)] -> [(Text, Int)]
forall a b. (a -> b) -> a -> b
$
Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
D.columnIndices DataFrame
df)
in (Text -> (Text, String)) -> [Text] -> [(Text, String)]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
name -> (Text
name, Text -> DataFrame -> String
getColumnTypeStr Text
name DataFrame
df)) [Text]
orderedNames
getColumnTypeStr :: T.Text -> D.DataFrame -> String
getColumnTypeStr :: Text -> DataFrame -> String
getColumnTypeStr Text
name DataFrame
df = case Text -> DataFrame -> Maybe Column
D.getColumn Text
name DataFrame
df of
Just Column
col -> Column -> String
C.columnTypeString Column
col
Maybe Column
Nothing -> String -> String
forall a. HasCallStack => String -> a
error (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
"Column not found: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack Text
name
mkColumnType :: (T.Text, String) -> Q Type
mkColumnType :: (Text, String) -> Q Type
mkColumnType (Text
name, String
tyStr) = do
Type
ty <- String -> Q Type
parseTypeString String
tyStr
let nameLit :: Type
nameLit = TyLit -> Type
LitT (String -> TyLit
StrTyLit (Text -> String
T.unpack Text
name))
Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Column Type -> Type -> Type
`AppT` Type
nameLit Type -> Type -> Type
`AppT` Type
ty
parseTypeString :: String -> Q Type
parseTypeString :: String -> Q Type
parseTypeString String
"Int" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Int
parseTypeString String
"Double" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Double
parseTypeString String
"Float" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Float
parseTypeString String
"Bool" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Bool
parseTypeString String
"Char" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Char
parseTypeString String
"String" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''String
parseTypeString String
"Text" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''T.Text
parseTypeString String
"Integer" = Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Integer
parseTypeString String
s
| String
"Maybe " String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isPrefixOf` String
s = do
Type
inner <- String -> Q Type
parseTypeString (Int -> String -> String
forall a. Int -> [a] -> [a]
L.drop Int
6 String
s)
Type -> Q Type
forall a. a -> Q a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Type -> Q Type) -> Type -> Q Type
forall a b. (a -> b) -> a -> b
$ Name -> Type
ConT ''Maybe Type -> Type -> Type
`AppT` Type
inner
parseTypeString String
s = String -> Q Type
forall a. String -> Q a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Q Type) -> String -> Q Type
forall a b. (a -> b) -> a -> b
$ String
"Unsupported column type in schema inference: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
findDuplicate :: (Eq a) => [a] -> Maybe a
findDuplicate :: forall a. Eq a => [a] -> Maybe a
findDuplicate [] = Maybe a
forall a. Maybe a
Nothing
findDuplicate (a
x : [a]
xs)
| a
x a -> [a] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [a]
xs = a -> Maybe a
forall a. a -> Maybe a
Just a
x
| Bool
otherwise = [a] -> Maybe a
forall a. Eq a => [a] -> Maybe a
findDuplicate [a]
xs