-- |
-- Module      : Text.Megaparsec.CSV
-- Description : Parsec CSV files with optional escape characters using megaparsec
-- Copyright   : [2025] Noah Martin Williams
-- License     : BSD3
--
-- Maintainer  : Noah Martin Williams <noahmartinwilliams@gmail.com>
-- Stability   : experimental
-- Portability : portable
--
-- This module contains the csv function.
module Text.Megaparsec.CSV(csv, CSVParser) where

import Control.Monad
import Data.Void
import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Char.Lexer
import Text.Megaparsec.Internal


-- | The parsing type for CSV files
-- 
type CSVParser = Parsec Void String

escapedChar :: Char -> CSVParser Char
escapedChar :: Char -> CSVParser Char
escapedChar Char
c = do
    ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity (Token String)
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
c
    Char
c' <- CSVParser Char
ParsecT Void String Identity (Token String)
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle 
    Char -> CSVParser Char
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c'

removeCR :: String -> String
removeCR :: String -> String
removeCR String
ret = do
    let l :: Int
l = String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
Prelude.length String
ret
    if Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0
    then
        if (String
ret String -> Int -> Char
forall a. HasCallStack => [a] -> Int -> a
!! (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r' 
        then
            (Int -> String -> String
forall a. Int -> [a] -> [a]
Prelude.take (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) String
ret)
        else
            String
ret
    else
        String
""

eol' :: CSVParser ()
eol' :: ParsecT Void String Identity ()
eol' = ((Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
'\n' ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> () -> ParsecT Void String Identity ()
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof)

eoe :: Char -> CSVParser ()
eoe :: Char -> ParsecT Void String Identity ()
eoe Char
c = ((Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
c ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> () -> ParsecT Void String Identity ()
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
'\n' ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b.
ParsecT Void String Identity a
-> ParsecT Void String Identity b -> ParsecT Void String Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> () -> ParsecT Void String Identity ()
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof)

csvEntry :: Char -> Maybe Char -> CSVParser String
csvEntry :: Char -> Maybe Char -> CSVParser String
csvEntry Char
sep (Just Char
esc) = do
    String
ret <- CSVParser Char -> CSVParser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many ((Char -> CSVParser Char
escapedChar Char
esc) CSVParser Char -> CSVParser Char -> CSVParser Char
forall a.
ParsecT Void String Identity a
-> ParsecT Void String Identity a -> ParsecT Void String Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ([Token String] -> ParsecT Void String Identity (Token String)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
noneOf (Char
sep Char -> String -> String
forall a. a -> [a] -> [a]
: String
"\n")))
    ParsecT Void String Identity (Maybe (Token String))
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity (Maybe (Token String))
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity (Maybe (Token String))
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity (Maybe (Token String))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
sep)
    String -> CSVParser String
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> String
removeCR String
ret)
csvEntry Char
sep Maybe Char
Nothing = do
    String
ret <- CSVParser Char -> CSVParser String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many ([Token String] -> ParsecT Void String Identity (Token String)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
noneOf (Char
sep Char -> String -> String
forall a. a -> [a] -> [a]
: String
"\n")) 
    ParsecT Void String Identity (Maybe (Token String))
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity (Maybe (Token String))
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity (Maybe (Token String))
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity (Maybe (Token String))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
sep)
    String -> CSVParser String
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> String
removeCR String
ret)

csvLine :: Char -> Maybe Char -> CSVParser [String]
csvLine :: Char -> Maybe Char -> CSVParser [String]
csvLine Char
sep Maybe Char
escape = do
    [String]
ret <- CSVParser String
-> ParsecT Void String Identity () -> CSVParser [String]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (Char -> Maybe Char -> CSVParser String
csvEntry Char
sep Maybe Char
escape) (ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall a.
ParsecT Void String Identity a -> ParsecT Void String Identity a
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
lookAhead ParsecT Void String Identity ()
eol')
    ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity ()
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity ()
eol'
    [String] -> CSVParser [String]
forall a. a -> ParsecT Void String Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return [String]
ret

-- | The CSV parser. The first argument is the seperator and the second argument is the optional escape character. 
--
csv :: Char -> Maybe Char -> CSVParser [[String]]
csv :: Char -> Maybe Char -> CSVParser [[String]]
csv Char
seperator Maybe Char
escape = CSVParser [String]
-> ParsecT Void String Identity () -> CSVParser [[String]]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (Char -> Maybe Char -> CSVParser [String]
csvLine Char
seperator Maybe Char
escape) ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof