{-# LANGUAGE OverloadedStrings #-}
module Langchain.DocumentLoader.FileLoader
( FileLoader (..)
) where
import Data.Aeson
import Data.Map (fromList)
import Data.Text (pack)
import Langchain.DocumentLoader.Core
import Langchain.TextSplitter.Character
import System.Directory (doesFileExist)
data FileLoader = FileLoader FilePath
instance BaseLoader FileLoader where
load :: FileLoader -> IO (Either String [Document])
load (FileLoader String
path) = do
Bool
exists <- String -> IO Bool
doesFileExist String
path
if Bool
exists
then do
String
content <- String -> IO String
readFile String
path
let meta :: Map Text Value
meta = [(Text, Value)] -> Map Text Value
forall k a. Ord k => [(k, a)] -> Map k a
fromList [(Text
"source", Text -> Value
String (Text -> Value) -> Text -> Value
forall a b. (a -> b) -> a -> b
$ String -> Text
pack String
path)]
Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ [Document] -> Either String [Document]
forall a b. b -> Either a b
Right [Text -> Map Text Value -> Document
Document (String -> Text
pack String
content) Map Text Value
meta]
else
Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Document]
forall a b. a -> Either a b
Left (String -> Either String [Document])
-> String -> Either String [Document]
forall a b. (a -> b) -> a -> b
$ String
"File not found: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
path
loadAndSplit :: FileLoader -> IO (Either String [Text])
loadAndSplit (FileLoader String
path) = do
Bool
exists <- String -> IO Bool
doesFileExist String
path
if Bool
exists
then do
String
content <- String -> IO String
readFile String
path
Either String [Text] -> IO (Either String [Text])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Text] -> IO (Either String [Text]))
-> Either String [Text] -> IO (Either String [Text])
forall a b. (a -> b) -> a -> b
$ [Text] -> Either String [Text]
forall a b. b -> Either a b
Right ([Text] -> Either String [Text]) -> [Text] -> Either String [Text]
forall a b. (a -> b) -> a -> b
$ CharacterSplitterOps -> Text -> [Text]
splitText CharacterSplitterOps
defaultCharacterSplitterOps (String -> Text
pack String
content)
else
Either String [Text] -> IO (Either String [Text])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Text] -> IO (Either String [Text]))
-> Either String [Text] -> IO (Either String [Text])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Text]
forall a b. a -> Either a b
Left (String -> Either String [Text]) -> String -> Either String [Text]
forall a b. (a -> b) -> a -> b
$ String
"File not found: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
path