{-# LANGUAGE ScopedTypeVariables #-}

module Scrappy.Files where

import Scrappy.Types
import Scrappy.Scrape
import Text.Parsec
import Scrappy.Elem
import qualified Data.Map.Strict as Map
import Control.Exception
import Control.Monad
import Data.Map.Strict (Map,keys, toList)
import Data.List (foldl')
import System.FilePath
import System.Directory


-- | Recursively lists all files in a directory, returning absolute file paths.
listFilesRecursive :: FilePath -> IO [FilePath]
listFilesRecursive :: String -> IO [String]
listFilesRecursive String
dir = do
    [String]
contents <- String -> IO [String]
listDirectory String
dir         -- Get directory contents
    [[String]]
paths <- [String] -> (String -> IO [String]) -> IO [[String]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [String]
contents ((String -> IO [String]) -> IO [[String]])
-> (String -> IO [String]) -> IO [[String]]
forall a b. (a -> b) -> a -> b
$ \String
name -> do
        let fullPath :: String
fullPath = String
dir String -> String -> String
</> String
name        -- Create full path
        Bool
isDir <- String -> IO Bool
doesDirectoryExist String
fullPath
        if Bool
isDir
            then String -> IO [String]
listFilesRecursive String
fullPath  -- Recursively search subdirectories
            else do
                String
absPath <- String -> IO String
makeAbsolute String
fullPath  -- Get absolute path in IO context
                [String] -> IO [String]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [String
absPath]                 -- Wrap in list for concatenation
    [String] -> IO [String]
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ([[String]] -> [String]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[String]]
paths)  -- Flatten list of lists


searchFile :: ScraperT a -> FilePath -> IO Bool
searchFile :: forall a. ScraperT a -> String -> IO Bool
searchFile ScraperT a
p String
fp = do
  String
str <- String -> IO String
readFile String
fp
  Bool -> IO Bool
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Bool -> IO Bool) -> Bool -> IO Bool
forall a b. (a -> b) -> a -> b
$ ScraperT a -> String -> Bool
forall a. ScraperT a -> String -> Bool
exists ScraperT a
p String
str

searchStrFile :: String -> FilePath -> IO Bool
searchStrFile :: String -> String -> IO Bool
searchStrFile String
s String
fp = ScraperT String -> String -> IO Bool
forall a. ScraperT a -> String -> IO Bool
searchFile (String -> ScraperT String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"s") String
fp

searchManyFile :: [String] -> FilePath -> IO (Map String Int)
searchManyFile :: [String] -> String -> IO (Map String Int)
searchManyFile [String]
strs String
fp = (IO (Map String Int)
 -> (IOException -> IO (Map String Int)) -> IO (Map String Int))
-> (IOException -> IO (Map String Int))
-> IO (Map String Int)
-> IO (Map String Int)
forall a b c. (a -> b -> c) -> b -> a -> c
flip IO (Map String Int)
-> (IOException -> IO (Map String Int)) -> IO (Map String Int)
forall e a. Exception e => IO a -> (e -> IO a) -> IO a
catch (\(IOException
_ :: IOException) -> Map String Int -> IO (Map String Int)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Map String Int
forall a. Monoid a => a
mempty) (IO (Map String Int) -> IO (Map String Int))
-> IO (Map String Int) -> IO (Map String Int)
forall a b. (a -> b) -> a -> b
$ do
  String -> IO ()
forall a. Show a => a -> IO ()
print String
fp
  String
file <- String -> IO String
readFile String
fp
  case ScraperT String -> String -> Maybe [String]
forall a. ScraperT a -> String -> Maybe [a]
scrape ([String] -> ScraperT String
forall s (m :: * -> *) u.
Stream s m Char =>
[String] -> ParsecT s u m String
buildElemsOpts [String]
strs) String
file of
    Maybe [String]
Nothing -> Map String Int -> IO (Map String Int)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Map String Int
forall a. Monoid a => a
mempty
    Just [String]
results -> Map String Int -> IO (Map String Int)
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Map String Int -> IO (Map String Int))
-> Map String Int -> IO (Map String Int)
forall a b. (a -> b) -> a -> b
$ [String] -> Map String Int
countOccurrences [String]
results

-- | Function to count occurrences of each unique string in a list
countOccurrences :: [String] -> Map String Int
countOccurrences :: [String] -> Map String Int
countOccurrences = (Map String Int -> String -> Map String Int)
-> Map String Int -> [String] -> Map String Int
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Map String Int
acc String
word -> (Int -> Int -> Int)
-> String -> Int -> Map String Int -> Map String Int
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
Map.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) String
word Int
1 Map String Int
acc) Map String Int
forall k a. Map k a
Map.empty



areFilesUsed :: FilePath -> FilePath -> IO ()
areFilesUsed :: String -> String -> IO ()
areFilesUsed String
sourceDir String
usageDir = do
  [String]
sources <- String -> IO [String]
listFilesRecursive String
sourceDir
  [String]
searchFiles <- String -> IO [String]
listFilesRecursive String
usageDir
  let sources' :: [String]
sources' = String -> String
takeFileName (String -> String) -> [String] -> [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
sources
  [Map String Int]
maps <- (String -> IO (Map String Int)) -> [String] -> IO [Map String Int]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (\String
x -> [String] -> String -> IO (Map String Int)
searchManyFile [String]
sources' String
x) [String]
searchFiles
  let mapped :: Map String Int
mapped = [Map String Int] -> Map String Int
forall a. Monoid a => [a] -> a
mconcat [Map String Int]
maps
  Map String Int -> IO ()
forall a. Show a => a -> IO ()
print Map String Int
mapped

  String -> IO ()
forall a. Show a => a -> IO ()
print String
"---"

  [String] -> IO ()
forall a. Show a => a -> IO ()
print ([String] -> IO ()) -> [String] -> IO ()
forall a b. (a -> b) -> a -> b
$ (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (\String
s -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem String
s (Map String Int -> [String]
forall k a. Map k a -> [k]
keys Map String Int
mapped)) [String]
sources'

areFilesUsed' :: FilePath -> [FilePath] -> IO [String]
areFilesUsed' :: String -> [String] -> IO [String]
areFilesUsed' String
sourceDir [String]
usageDirs = do
  [String]
sources <- String -> IO [String]
listFilesRecursive String
sourceDir
  [String]
searchFiles <- [[String]] -> [String]
forall a. Monoid a => [a] -> a
mconcat ([[String]] -> [String]) -> IO [[String]] -> IO [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (String -> IO [String]) -> [String] -> IO [[String]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM String -> IO [String]
listFilesRecursive [String]
usageDirs
  String -> IO ()
putStr String
"Number of files: "
  Int -> IO ()
forall a. Show a => a -> IO ()
print (Int -> IO ()) -> Int -> IO ()
forall a b. (a -> b) -> a -> b
$ [String] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
searchFiles
  let sources' :: [String]
sources' = String -> String
takeFileName (String -> String) -> [String] -> [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
sources
  [Map String Int]
maps <- (String -> IO (Map String Int)) -> [String] -> IO [Map String Int]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (\String
x -> [String] -> String -> IO (Map String Int)
searchManyFile [String]
sources' String
x) [String]
searchFiles
  let mapped :: Map String Int
mapped = [Map String Int] -> Map String Int
forall a. Monoid a => [a] -> a
mconcat [Map String Int]
maps
  ((String, Int) -> IO ()) -> [(String, Int)] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (String, Int) -> IO ()
forall a. Show a => a -> IO ()
print ([(String, Int)] -> IO ()) -> [(String, Int)] -> IO ()
forall a b. (a -> b) -> a -> b
$ Map String Int -> [(String, Int)]
forall k a. Map k a -> [(k, a)]
toList Map String Int
mapped

  String -> IO ()
forall a. Show a => a -> IO ()
print String
"---"

  let unused :: [String]
unused = (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (\String
s -> String
s String -> [String] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Map String Int -> [String]
forall k a. Map k a -> [k]
keys Map String Int
mapped) [String]
sources'
  (String -> IO ()) -> [String] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ String -> IO ()
forall a. Show a => a -> IO ()
print [String]
unused
  [String] -> IO [String]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [String]
unused