{-# LANGUAGE OverloadedStrings #-}

{- |
Module      : Langchain.DocumentLoader.FileLoader
Description : File loading implementation for LangChain Haskell
Copyright   : (c) 2025 Tushar Adhatrao
License     : MIT
Maintainer  : Tushar Adhatrao <tusharadhatrao@gmail.com>
Stability   : experimental

File-based document loader implementation following LangChain's document loading patterns
Integrates with the core document splitting functionality for processing text files.

Example usage:

@
-- Load a document from file
loader = FileLoader "data.txt"
docs <- load loader
-- Right [Document {pageContent = "File content", metadata = ...}]

-- Load and split document content
chunks <- loadAndSplit loader
-- Right ["First paragraph", "Second paragraph", ...]
@
-}
module Langchain.DocumentLoader.FileLoader
  ( FileLoader (..)
  ) where

import Data.Aeson
import Data.Map (fromList)
import Data.Text (pack)
import Langchain.DocumentLoader.Core
import Langchain.TextSplitter.Character
import System.Directory (doesFileExist)

{- | File loader configuration
Specifies the file path to load documents from.

Example:

>>> FileLoader "docs/example.txt"
FileLoader "docs/example.txt"
-}
data FileLoader = FileLoader FilePath

instance BaseLoader FileLoader where
  -- \| Load document with file source metadata
  --
  --  Example:
  
  --  >>> load (FileLoader "test.txt")
  --  Right [Document {pageContent = "Test content", metadata = fromList [("source", "test.txt")]}]
  --
  load :: FileLoader -> IO (Either String [Document])
load (FileLoader String
path) = do
    Bool
exists <- String -> IO Bool
doesFileExist String
path
    if Bool
exists
      then do
        String
content <- String -> IO String
readFile String
path
        let meta :: Map Text Value
meta = [(Text, Value)] -> Map Text Value
forall k a. Ord k => [(k, a)] -> Map k a
fromList [(Text
"source", Text -> Value
String (Text -> Value) -> Text -> Value
forall a b. (a -> b) -> a -> b
$ String -> Text
pack String
path)]
        Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ [Document] -> Either String [Document]
forall a b. b -> Either a b
Right [Text -> Map Text Value -> Document
Document (String -> Text
pack String
content) Map Text Value
meta]
      else
        Either String [Document] -> IO (Either String [Document])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Document] -> IO (Either String [Document]))
-> Either String [Document] -> IO (Either String [Document])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Document]
forall a b. a -> Either a b
Left (String -> Either String [Document])
-> String -> Either String [Document]
forall a b. (a -> b) -> a -> b
$ String
"File not found: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
path

  -- \| Load and split content using default character splitter
  --
  --  Example:
  
  --  >>> loadAndSplit (FileLoader "split.txt")
  --  Right ["Paragraph 1", "Paragraph 2", ...]
  --
  loadAndSplit :: FileLoader -> IO (Either String [Text])
loadAndSplit (FileLoader String
path) = do
    Bool
exists <- String -> IO Bool
doesFileExist String
path
    if Bool
exists
      then do
        String
content <- String -> IO String
readFile String
path
        Either String [Text] -> IO (Either String [Text])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Text] -> IO (Either String [Text]))
-> Either String [Text] -> IO (Either String [Text])
forall a b. (a -> b) -> a -> b
$ [Text] -> Either String [Text]
forall a b. b -> Either a b
Right ([Text] -> Either String [Text]) -> [Text] -> Either String [Text]
forall a b. (a -> b) -> a -> b
$ CharacterSplitterOps -> Text -> [Text]
splitText CharacterSplitterOps
defaultCharacterSplitterOps (String -> Text
pack String
content)
      else
        Either String [Text] -> IO (Either String [Text])
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String [Text] -> IO (Either String [Text]))
-> Either String [Text] -> IO (Either String [Text])
forall a b. (a -> b) -> a -> b
$ String -> Either String [Text]
forall a b. a -> Either a b
Left (String -> Either String [Text]) -> String -> Either String [Text]
forall a b. (a -> b) -> a -> b
$ String
"File not found: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
path

{- $examples
Test case patterns:
1. Successful load with metadata
   >>> withTestFile "Content" $ \path -> load (FileLoader path)
   Right [Document {pageContent = "Content", metadata = ...}]

2. Error handling for missing files
   >>> load (FileLoader "missing.txt")
   Left "File not found: missing.txt"

3. Content splitting with default parameters
   >>> withTestFile "A\n\nB\n\nC" $ \path -> loadAndSplit (FileLoader path)
   Right ["A", "B", "C"]
-}