{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Module      : Clod.FileSystem.Detection
-- Description : File detection operations for Clod
-- Copyright   : (c) Fuzz Leonard, 2025
-- License     : MIT
-- Maintainer  : cyborg@bionicfuzz.com
-- Stability   : experimental
--
-- This module provides functions for detecting file types and states
-- using libmagic for robust file type detection. It determines whether
-- files are text or binary based on their content rather than just extensions.

module Clod.FileSystem.Detection
  ( -- * File type detection
    isTextFile
  , isModifiedSince
  , safeFileExists
  , safeIsTextFile
  , isTextDescription
  , needsTransformation
  ) where

import Control.Exception (try, SomeException)
import Control.Monad.Except (throwError)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (asks)
import Data.List (isPrefixOf)
import Data.Time.Clock (UTCTime)
import System.Directory (doesFileExist, getModificationTime, canonicalizePath)
import System.FilePath ((</>), takeFileName, takeExtension)
import qualified Data.ByteString.Char8 as BS
import qualified Data.Text as T
import qualified Dhall
import Data.FileEmbed (embedStringFile)

import Clod.Types (ClodM, FileReadCap(..), ClodError(..), isPathAllowed, ClodConfig(..))
import qualified Magic.Init as Magic
import qualified Magic.Operations as Magic

-- | Type to represent text patterns from Dhall
newtype TextPatterns = TextPatterns
  { TextPatterns -> [Text]
textPatterns :: [T.Text]
  } deriving (Int -> TextPatterns -> ShowS
[TextPatterns] -> ShowS
TextPatterns -> [Char]
(Int -> TextPatterns -> ShowS)
-> (TextPatterns -> [Char])
-> ([TextPatterns] -> ShowS)
-> Show TextPatterns
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> TextPatterns -> ShowS
showsPrec :: Int -> TextPatterns -> ShowS
$cshow :: TextPatterns -> [Char]
show :: TextPatterns -> [Char]
$cshowList :: [TextPatterns] -> ShowS
showList :: [TextPatterns] -> ShowS
Show, TextPatterns -> TextPatterns -> Bool
(TextPatterns -> TextPatterns -> Bool)
-> (TextPatterns -> TextPatterns -> Bool) -> Eq TextPatterns
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: TextPatterns -> TextPatterns -> Bool
== :: TextPatterns -> TextPatterns -> Bool
$c/= :: TextPatterns -> TextPatterns -> Bool
/= :: TextPatterns -> TextPatterns -> Bool
Eq)

instance Dhall.FromDhall TextPatterns where
  autoWith :: InputNormalizer -> Decoder TextPatterns
autoWith InputNormalizer
_ = RecordDecoder TextPatterns -> Decoder TextPatterns
forall a. RecordDecoder a -> Decoder a
Dhall.record (RecordDecoder TextPatterns -> Decoder TextPatterns)
-> RecordDecoder TextPatterns -> Decoder TextPatterns
forall a b. (a -> b) -> a -> b
$
    [Text] -> TextPatterns
TextPatterns ([Text] -> TextPatterns)
-> RecordDecoder [Text] -> RecordDecoder TextPatterns
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Decoder [Text] -> RecordDecoder [Text]
forall a. Text -> Decoder a -> RecordDecoder a
Dhall.field Text
"textPatterns" Decoder [Text]
forall a. FromDhall a => Decoder a
Dhall.auto

-- | Default text patterns content embedded at compile time
defaultTextPatternsContent :: String
defaultTextPatternsContent :: [Char]
defaultTextPatternsContent = ByteString -> [Char]
BS.unpack $(embedStringFile "resources/text_patterns.dhall")

-- | Parse the embedded text patterns content
parseDefaultTextPatterns :: ClodM TextPatterns
parseDefaultTextPatterns :: ClodM TextPatterns
parseDefaultTextPatterns = do
  result <- IO (Either SomeException TextPatterns)
-> ReaderT
     ClodConfig
     (ExceptT ClodError IO)
     (Either SomeException TextPatterns)
forall a. IO a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either SomeException TextPatterns)
 -> ReaderT
      ClodConfig
      (ExceptT ClodError IO)
      (Either SomeException TextPatterns))
-> IO (Either SomeException TextPatterns)
-> ReaderT
     ClodConfig
     (ExceptT ClodError IO)
     (Either SomeException TextPatterns)
forall a b. (a -> b) -> a -> b
$ IO TextPatterns -> IO (Either SomeException TextPatterns)
forall e a. Exception e => IO a -> IO (Either e a)
try (IO TextPatterns -> IO (Either SomeException TextPatterns))
-> IO TextPatterns -> IO (Either SomeException TextPatterns)
forall a b. (a -> b) -> a -> b
$ Decoder TextPatterns -> Text -> IO TextPatterns
forall a. Decoder a -> Text -> IO a
Dhall.input Decoder TextPatterns
forall a. FromDhall a => Decoder a
Dhall.auto ([Char] -> Text
T.pack [Char]
defaultTextPatternsContent)
  case result of
    Right TextPatterns
patterns -> TextPatterns -> ClodM TextPatterns
forall a. a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. Monad m => a -> m a
return TextPatterns
patterns
    Left (SomeException
e :: SomeException) -> 
      ClodError -> ClodM TextPatterns
forall a. ClodError -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (ClodError -> ClodM TextPatterns)
-> ClodError -> ClodM TextPatterns
forall a b. (a -> b) -> a -> b
$ [Char] -> ClodError
ConfigError ([Char] -> ClodError) -> [Char] -> ClodError
forall a b. (a -> b) -> a -> b
$ [Char]
"Failed to parse default text patterns: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ SomeException -> [Char]
forall a. Show a => a -> [Char]
show SomeException
e

-- | Load text patterns for determining text files
-- This first checks for a custom pattern file in the clod directory,
-- and falls back to the embedded default patterns if not found.
loadTextPatterns :: ClodM TextPatterns
loadTextPatterns :: ClodM TextPatterns
loadTextPatterns = do
  clodDir <- (ClodConfig -> [Char])
-> ReaderT ClodConfig (ExceptT ClodError IO) [Char]
forall r (m :: * -> *) a. MonadReader r m => (r -> a) -> m a
asks ClodConfig -> [Char]
configDir
  
  -- First check if there's a custom pattern file in the clod directory
  let customPath = [Char]
clodDir [Char] -> ShowS
</> [Char]
"resources" [Char] -> ShowS
</> [Char]
"text_patterns.dhall"
  customExists <- liftIO $ doesFileExist customPath
  
  if customExists
    then do
      result <- liftIO $ try $ Dhall.inputFile Dhall.auto customPath
      case result of
        Right TextPatterns
patterns -> TextPatterns -> ClodM TextPatterns
forall a. a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. Monad m => a -> m a
return TextPatterns
patterns
        Left (SomeException
e :: SomeException) -> 
          ClodError -> ClodM TextPatterns
forall a. ClodError -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (ClodError -> ClodM TextPatterns)
-> ClodError -> ClodM TextPatterns
forall a b. (a -> b) -> a -> b
$ [Char] -> ClodError
ConfigError ([Char] -> ClodError) -> [Char] -> ClodError
forall a b. (a -> b) -> a -> b
$ [Char]
"Failed to load custom text patterns: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ SomeException -> [Char]
forall a. Show a => a -> [Char]
show SomeException
e
    else parseDefaultTextPatterns

-- | Check if a file is a text file using libmagic with enhanced detection
isTextFile :: FilePath -> ClodM Bool
isTextFile :: [Char] -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
isTextFile [Char]
file = do
  exists <- IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a. IO a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool)
-> IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> IO Bool
doesFileExist [Char]
file
  if not exists
    then return False
    else do
      -- Detect the file type using libmagic
      result <- liftIO $ try $ do
        magic <- Magic.magicOpen []
        Magic.magicLoadDefault magic
        Magic.magicFile magic file
      
      case result of
        Left (SomeException
_ :: SomeException) -> Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a. a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
        Right [Char]
description -> [Char] -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
isTextDescription [Char]
description

-- | Helper to determine if a file description indicates text content
isTextDescription :: String -> ClodM Bool
isTextDescription :: [Char] -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
isTextDescription [Char]
desc = do
  patterns <- ClodM TextPatterns
loadTextPatterns
  let lowerDesc = Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ [Char] -> Text
T.pack [Char]
desc
  return $ any (\Text
pattern -> Text
pattern Text -> Text -> Bool
`T.isInfixOf` Text
lowerDesc) (textPatterns patterns)

-- | Special handling for files that need transformation
--
-- Detects files that need special handling and transformation based on their
-- name or extension. Currently identifies:
--
-- * Hidden files (dotfiles) - need to be transformed to be visible
-- * SVG files - need to be transformed to XML for Claude compatibility
--
-- >>> needsTransformation ".gitignore"
-- True
--
-- >>> needsTransformation "logo.svg"
-- True
--
-- >>> needsTransformation "regular-file.txt"
-- False
needsTransformation :: FilePath -> Bool
needsTransformation :: [Char] -> Bool
needsTransformation [Char]
path =
  -- Handle dotfiles and SVG files with special transformation
  ([Char]
"." [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` ShowS
takeFileName [Char]
path) Bool -> Bool -> Bool
|| 
  (ShowS
takeExtension [Char]
path [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
".svg")

-- | Check if a file has been modified since the given time
isModifiedSince :: FilePath -> UTCTime -> FilePath -> ClodM Bool
isModifiedSince :: [Char]
-> UTCTime
-> [Char]
-> ReaderT ClodConfig (ExceptT ClodError IO) Bool
isModifiedSince [Char]
basePath UTCTime
lastRunTime [Char]
relPath = do
  let fullPath :: [Char]
fullPath = [Char]
basePath [Char] -> ShowS
</> [Char]
relPath
  fileExists <- IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a. IO a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool)
-> IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a b. (a -> b) -> a -> b
$ [Char] -> IO Bool
doesFileExist [Char]
fullPath
  if not fileExists
    then return False
    else do
      modTime <- liftIO $ getModificationTime fullPath
      return (modTime > lastRunTime)

-- | Safe file existence check that checks capabilities
safeFileExists :: FileReadCap -> FilePath -> ClodM Bool
safeFileExists :: FileReadCap
-> [Char] -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
safeFileExists FileReadCap
cap [Char]
path = do
  allowed <- IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a. IO a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool)
-> IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char] -> IO Bool
isPathAllowed (FileReadCap -> [[Char]]
allowedReadDirs FileReadCap
cap) [Char]
path
  if allowed
    then liftIO $ doesFileExist path
    else do
      canonicalPath <- liftIO $ canonicalizePath path
      throwError $ CapabilityError $ "Access denied: Cannot check existence of file outside allowed directories: " ++ canonicalPath

-- | Safe file type check that checks capabilities
safeIsTextFile :: FileReadCap -> FilePath -> ClodM Bool
safeIsTextFile :: FileReadCap
-> [Char] -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
safeIsTextFile FileReadCap
cap [Char]
path = do
  allowed <- IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a. IO a -> ReaderT ClodConfig (ExceptT ClodError IO) a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool)
-> IO Bool -> ReaderT ClodConfig (ExceptT ClodError IO) Bool
forall a b. (a -> b) -> a -> b
$ [[Char]] -> [Char] -> IO Bool
isPathAllowed (FileReadCap -> [[Char]]
allowedReadDirs FileReadCap
cap) [Char]
path
  if allowed
    then isTextFile path
    else do
      canonicalPath <- liftIO $ canonicalizePath path
      throwError $ CapabilityError $ "Access denied: Cannot check file type outside allowed directories: " ++ canonicalPath