{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}

{- |
Module:      Langchain.OutputParser.Core
Copyright:   (c) 2025 Tushar Adhatrao
License:     MIT
Maintainer:  Tushar Adhatrao <tusharadhatrao@gmail.com>
Stability:   experimental

This module provides the core types and instances for output parsers in the Langchain Haskell port.
Output parsers are used to transform the raw output from language models into structured data formats,
making it easier to work with the results in downstream applications.

The 'OutputParser' typeclass defines the interface for parsing model output into specific types,
and this module provides instances for common data structures such as booleans, lists, and JSON objects.

For more information on output parsers in the original Langchain library, see:
https://python.langchain.com/docs/concepts/output_parsers/
-}
module Langchain.OutputParser.Core
  ( -- * Typeclass
    OutputParser (..)

    -- * Parsers
  , CommaSeparatedList (..)
  , JSONOutputStructure (..)
  , NumberSeparatedList (..)
  ) where

import Data.Aeson
import Data.ByteString.Char8 (fromStrict)
import Data.Char (isDigit, isSpace)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import Data.Text.Internal.Search (indices)

{- | Typeclass for parsing output from language models into specific types.
Instances of this class define how to convert a 'Text' output into a value of type 'a'.
-}
class OutputParser a where
  -- | Parse the given text into a value of type 'a'.
  -- Returns 'Left' with an error message if parsing fails, or 'Right' with the parsed value.
  parse :: Text -> Either String a

-- | Represents a list of text items separated by commas.
newtype CommaSeparatedList = CommaSeparatedList [Text]
  deriving (Int -> CommaSeparatedList -> ShowS
[CommaSeparatedList] -> ShowS
CommaSeparatedList -> String
(Int -> CommaSeparatedList -> ShowS)
-> (CommaSeparatedList -> String)
-> ([CommaSeparatedList] -> ShowS)
-> Show CommaSeparatedList
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CommaSeparatedList -> ShowS
showsPrec :: Int -> CommaSeparatedList -> ShowS
$cshow :: CommaSeparatedList -> String
show :: CommaSeparatedList -> String
$cshowList :: [CommaSeparatedList] -> ShowS
showList :: [CommaSeparatedList] -> ShowS
Show, CommaSeparatedList -> CommaSeparatedList -> Bool
(CommaSeparatedList -> CommaSeparatedList -> Bool)
-> (CommaSeparatedList -> CommaSeparatedList -> Bool)
-> Eq CommaSeparatedList
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CommaSeparatedList -> CommaSeparatedList -> Bool
== :: CommaSeparatedList -> CommaSeparatedList -> Bool
$c/= :: CommaSeparatedList -> CommaSeparatedList -> Bool
/= :: CommaSeparatedList -> CommaSeparatedList -> Bool
Eq)

instance OutputParser Bool where
  -- \| Parse a boolean value from the text.
  -- The text is considered 'True' if it contains the word "true" (case-insensitive),
  -- and 'False' if it contains "false". Otherwise, parsing fails.
  --
  -- === Examples
  --
  -- \* Parsing "true":
  --
  -- @
  -- parse "true" :: Either String Bool == Right True
  -- @
  --
  -- \* Parsing "False":
  --
  -- @
  -- parse "False" :: Either String Bool == Right False
  -- @
  --
  -- \* Parsing invalid input:
  --
  -- @
  -- parse "yes" :: Either String Bool == Left "Invalid boolean value"
  -- @
  parse :: Text -> Either String Bool
parse Text
txt = do
    let txt' :: Text
txt' = Text -> Text
T.strip (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toLower Text
txt
    if [Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Text -> Text -> [Int]
indices Text
"true" Text
txt') Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
      then
        Bool -> Either String Bool
forall a b. b -> Either a b
Right Bool
True
      else
        if [Int] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (Text -> Text -> [Int]
indices Text
"false" Text
txt') Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
          then
            Bool -> Either String Bool
forall a b. b -> Either a b
Right Bool
False
          else
            String -> Either String Bool
forall a b. a -> Either a b
Left String
"Invalid boolean value"

instance OutputParser CommaSeparatedList where
  -- \| Parse a comma-separated list from the text.
  -- The text is split by commas, and each part is stripped of leading/trailing whitespace.
  --
  -- === Examples
  --
  -- \* Parsing an empty string:
  --
  -- @
  -- parse "" :: Either String CommaSeparatedList == Right (CommaSeparatedList [""])
  -- @
  --
  -- \* Parsing a single item:
  --
  -- @
  -- parse "item" :: Either String CommaSeparatedList == Right (CommaSeparatedList ["item"])
  -- @
  --
  -- \* Parsing multiple items:
  --
  -- @
  -- parse "item1,item2,item3" :: Either String CommaSeparatedList == Right (CommaSeparatedList ["item1", "item2", "item3"])
  -- @
  parse :: Text -> Either String CommaSeparatedList
parse Text
txt = CommaSeparatedList -> Either String CommaSeparatedList
forall a b. b -> Either a b
Right (CommaSeparatedList -> Either String CommaSeparatedList)
-> CommaSeparatedList -> Either String CommaSeparatedList
forall a b. (a -> b) -> a -> b
$ [Text] -> CommaSeparatedList
CommaSeparatedList ([Text] -> CommaSeparatedList) -> [Text] -> CommaSeparatedList
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Text
T.strip ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$ HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"," Text
txt

{- | JSON parser wrapper
Requires 'FromJSON' instance for target type. Uses Aeson for parsing.

Example data type:

@
data Person = Person
  { name :: Text
  , age :: Int
  } deriving (Show, Eq, FromJSON)
@

Usage:

>>> parse "{\"name\": \"Bob\", \"age\": 25}" :: Either String (JSONOutputStructure Person)
Right (JSONOutputStructure {jsonValue = Person {name = "Bob", age = 25}})
-}
newtype FromJSON a => JSONOutputStructure a = JSONOutputStructure
  { forall a. FromJSON a => JSONOutputStructure a -> a
jsonValue :: a
  }
  deriving (Int -> JSONOutputStructure a -> ShowS
[JSONOutputStructure a] -> ShowS
JSONOutputStructure a -> String
(Int -> JSONOutputStructure a -> ShowS)
-> (JSONOutputStructure a -> String)
-> ([JSONOutputStructure a] -> ShowS)
-> Show (JSONOutputStructure a)
forall a.
(FromJSON a, Show a) =>
Int -> JSONOutputStructure a -> ShowS
forall a. (FromJSON a, Show a) => [JSONOutputStructure a] -> ShowS
forall a. (FromJSON a, Show a) => JSONOutputStructure a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a.
(FromJSON a, Show a) =>
Int -> JSONOutputStructure a -> ShowS
showsPrec :: Int -> JSONOutputStructure a -> ShowS
$cshow :: forall a. (FromJSON a, Show a) => JSONOutputStructure a -> String
show :: JSONOutputStructure a -> String
$cshowList :: forall a. (FromJSON a, Show a) => [JSONOutputStructure a] -> ShowS
showList :: [JSONOutputStructure a] -> ShowS
Show, JSONOutputStructure a -> JSONOutputStructure a -> Bool
(JSONOutputStructure a -> JSONOutputStructure a -> Bool)
-> (JSONOutputStructure a -> JSONOutputStructure a -> Bool)
-> Eq (JSONOutputStructure a)
forall a.
Eq a =>
JSONOutputStructure a -> JSONOutputStructure a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a.
Eq a =>
JSONOutputStructure a -> JSONOutputStructure a -> Bool
== :: JSONOutputStructure a -> JSONOutputStructure a -> Bool
$c/= :: forall a.
Eq a =>
JSONOutputStructure a -> JSONOutputStructure a -> Bool
/= :: JSONOutputStructure a -> JSONOutputStructure a -> Bool
Eq, Maybe (JSONOutputStructure a)
Value -> Parser [JSONOutputStructure a]
Value -> Parser (JSONOutputStructure a)
(Value -> Parser (JSONOutputStructure a))
-> (Value -> Parser [JSONOutputStructure a])
-> Maybe (JSONOutputStructure a)
-> FromJSON (JSONOutputStructure a)
forall a. FromJSON a => Maybe (JSONOutputStructure a)
forall a. FromJSON a => Value -> Parser [JSONOutputStructure a]
forall a. FromJSON a => Value -> Parser (JSONOutputStructure a)
forall a.
(Value -> Parser a)
-> (Value -> Parser [a]) -> Maybe a -> FromJSON a
$cparseJSON :: forall a. FromJSON a => Value -> Parser (JSONOutputStructure a)
parseJSON :: Value -> Parser (JSONOutputStructure a)
$cparseJSONList :: forall a. FromJSON a => Value -> Parser [JSONOutputStructure a]
parseJSONList :: Value -> Parser [JSONOutputStructure a]
$comittedField :: forall a. FromJSON a => Maybe (JSONOutputStructure a)
omittedField :: Maybe (JSONOutputStructure a)
FromJSON)

-- | Instance for parsing JSON into any type that implements FromJSON.
instance FromJSON a => OutputParser (JSONOutputStructure a) where
  parse :: Text -> Either String (JSONOutputStructure a)
parse Text
txt =
    case ByteString -> Either String (JSONOutputStructure a)
forall a. FromJSON a => ByteString -> Either String a
eitherDecode (ByteString -> ByteString
fromStrict (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Text -> ByteString
encodeUtf8 Text
txt) of
      Left String
err -> String -> Either String (JSONOutputStructure a)
forall a b. a -> Either a b
Left (String -> Either String (JSONOutputStructure a))
-> String -> Either String (JSONOutputStructure a)
forall a b. (a -> b) -> a -> b
$ String
"JSON parsing error: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
err
      Right JSONOutputStructure a
val -> JSONOutputStructure a -> Either String (JSONOutputStructure a)
forall a b. b -> Either a b
Right JSONOutputStructure a
val

-- | Represents a list of text items separated by numbered prefixes, like "1. First item".
newtype NumberSeparatedList = NumberSeparatedList [Text]
  deriving (Int -> NumberSeparatedList -> ShowS
[NumberSeparatedList] -> ShowS
NumberSeparatedList -> String
(Int -> NumberSeparatedList -> ShowS)
-> (NumberSeparatedList -> String)
-> ([NumberSeparatedList] -> ShowS)
-> Show NumberSeparatedList
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> NumberSeparatedList -> ShowS
showsPrec :: Int -> NumberSeparatedList -> ShowS
$cshow :: NumberSeparatedList -> String
show :: NumberSeparatedList -> String
$cshowList :: [NumberSeparatedList] -> ShowS
showList :: [NumberSeparatedList] -> ShowS
Show, NumberSeparatedList -> NumberSeparatedList -> Bool
(NumberSeparatedList -> NumberSeparatedList -> Bool)
-> (NumberSeparatedList -> NumberSeparatedList -> Bool)
-> Eq NumberSeparatedList
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: NumberSeparatedList -> NumberSeparatedList -> Bool
== :: NumberSeparatedList -> NumberSeparatedList -> Bool
$c/= :: NumberSeparatedList -> NumberSeparatedList -> Bool
/= :: NumberSeparatedList -> NumberSeparatedList -> Bool
Eq)

instance OutputParser NumberSeparatedList where
  -- \| Parse a numbered list from the text.
  -- The input is expected to be a list of items prefixed with numbers followed by dots,
  -- such as "1. First item\n2. Second item". Whitespace is trimmed from each item.
  --
  -- === Examples
  --
  -- \* Parsing a simple numbered list:
  --
  -- @
  -- parse "1. First item\n2. Second item\n3. Third item" :: Either String NumberSeparatedList == Right (NumberSeparatedList ["First item", "Second item", "Third item"])
  -- @
  --
  -- \* Handling whitespace:
  --
  -- @
  -- parse "1.   First item  \n  2.  Second item\n3. Third item" :: Either String NumberSeparatedList == Right (NumberSeparatedList ["First item", "Second item", "Third item"])
  -- @
  --
  -- \* Handling multi-digit numbers:
  --
  -- @
  -- parse "10. First item\n11. Second item\n12. Third item" :: Either String NumberSeparatedList == Right (NumberSeparatedList ["First item", "Second item", "Third item"])
  -- @
  parse :: Text -> Either String NumberSeparatedList
parse Text
txt =
    let s :: String
s = ShowS
trim (Text -> String
T.unpack Text
txt)
     in case String -> Maybe String
dropUntilAndConsumeBoundary String
s of
          Maybe String
Nothing -> String -> Either String NumberSeparatedList
forall a b. a -> Either a b
Left String
"No valid numbered items found"
          Just String
rest ->
            -- Parse the rest into items and wrap them in our newtype.
            NumberSeparatedList -> Either String NumberSeparatedList
forall a b. b -> Either a b
Right (NumberSeparatedList -> Either String NumberSeparatedList)
-> ([String] -> NumberSeparatedList)
-> [String]
-> Either String NumberSeparatedList
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> NumberSeparatedList
NumberSeparatedList ([Text] -> NumberSeparatedList)
-> ([String] -> [Text]) -> [String] -> NumberSeparatedList
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Text) -> [String] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
T.pack (String -> Text) -> ShowS -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
trim) ([String] -> Either String NumberSeparatedList)
-> [String] -> Either String NumberSeparatedList
forall a b. (a -> b) -> a -> b
$ String -> [String]
parseItems String
rest

{- | Drops noise until we find a valid boundary marker (number with dot)
and then consumes the marker.
-}
dropUntilAndConsumeBoundary :: String -> Maybe String
dropUntilAndConsumeBoundary :: String -> Maybe String
dropUntilAndConsumeBoundary String
s =
  case String -> Maybe (Int, Int)
findBoundary String
s of
    Maybe (Int, Int)
Nothing -> Maybe String
forall a. Maybe a
Nothing
    Just (Int
idx, Int
n) -> String -> Maybe String
forall a. a -> Maybe a
Just (Int -> ShowS
forall a. Int -> [a] -> [a]
drop (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) String
s)

{- | Scans the string for the next occurrence of a valid boundary.
Returns the index and length of the marker.
-}
findBoundary :: String -> Maybe (Int, Int)
findBoundary :: String -> Maybe (Int, Int)
findBoundary String
s = Int -> String -> Maybe (Int, Int)
forall {t}. Num t => t -> String -> Maybe (t, Int)
go Int
0 String
s
  where
    go :: t -> String -> Maybe (t, Int)
go t
_ [] = Maybe (t, Int)
forall a. Maybe a
Nothing
    go t
i xs :: String
xs@(Char
_ : String
rest) =
      case String -> Maybe Int
isBoundaryAt String
xs of
        Just Int
n -> (t, Int) -> Maybe (t, Int)
forall a. a -> Maybe a
Just (t
i, Int
n)
        Maybe Int
Nothing -> t -> String -> Maybe (t, Int)
go (t
i t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) String
rest

{- | Checks if the given string starts with a valid boundary.
A valid boundary has one or more digits, optional spaces,
a dot, then optional spaces. Returns the number of characters
consumed by the boundary if matched.
-}
isBoundaryAt :: String -> Maybe Int
isBoundaryAt :: String -> Maybe Int
isBoundaryAt String
s =
  let (String
digits, String
rest1) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
s
   in if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
digits
        then Maybe Int
forall a. Maybe a
Nothing
        else
          let (String
spaces, String
rest2) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSpace String
rest1
           in case String
rest2 of
                (Char
c : String
rest3)
                  | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' ->
                      let (String
spaces2, String
_) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isSpace String
rest3
                       in Int -> Maybe Int
forall a. a -> Maybe a
Just (String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
digits Int -> Int -> Int
forall a. Num a => a -> a -> a
+ String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
spaces Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ String -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
spaces2)
                String
_ -> Maybe Int
forall a. Maybe a
Nothing

-- | Recursively splits the string into items using the boundary markers.
parseItems :: String -> [String]
parseItems :: String -> [String]
parseItems String
s =
  case String -> Maybe (Int, Int)
findBoundary String
s of
    Maybe (Int, Int)
Nothing -> [String
s] -- No further boundaries; the rest is one item.
    Just (Int
idx, Int
n) ->
      let item :: String
item = Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
idx String
s
          rest :: String
rest = Int -> ShowS
forall a. Int -> [a] -> [a]
drop (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n) String
s
       in String
item String -> [String] -> [String]
forall a. a -> [a] -> [a]
: String -> [String]
parseItems String
rest

-- | A simple trim function to remove leading and trailing whitespace.
trim :: String -> String
trim :: ShowS
trim = ShowS
f ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
f
  where
    f :: ShowS
f = ShowS
forall a. [a] -> [a]
reverse ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace