{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Pptx.SmartArt
   Copyright   : © 2025 Anton Antic
   License     : GNU GPL, version 2 or above

   Maintainer  : Anton Antic <anton@everworker.ai>
   Stability   : alpha
   Portability : portable

SmartArt diagram parsing and text extraction for PPTX.
-}
module Text.Pandoc.Readers.Pptx.SmartArt
  ( PptxDiagram(..)
  , parseDiagram
  , diagramToBlocks
  ) where

import Codec.Archive.Zip (Archive, findEntryByPath, fromEntry)
import qualified Data.Map.Strict as M
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import qualified Data.Text.Lazy.Encoding as TL
import Data.Text (Text)
import Text.Pandoc.Definition
import Text.Pandoc.Readers.OOXML.Shared
import Text.Pandoc.XML.Light

-- | SmartArt diagram data
data PptxDiagram = PptxDiagram
  { PptxDiagram -> Text
diagramType :: Text               -- Layout type (chevron, cycle, etc.)
  , PptxDiagram -> [(Text, [Text])]
diagramNodes :: [(Text, [Text])]  -- (nodeText, childTexts)
  } deriving (Int -> PptxDiagram -> ShowS
[PptxDiagram] -> ShowS
PptxDiagram -> [Char]
(Int -> PptxDiagram -> ShowS)
-> (PptxDiagram -> [Char])
-> ([PptxDiagram] -> ShowS)
-> Show PptxDiagram
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PptxDiagram -> ShowS
showsPrec :: Int -> PptxDiagram -> ShowS
$cshow :: PptxDiagram -> [Char]
show :: PptxDiagram -> [Char]
$cshowList :: [PptxDiagram] -> ShowS
showList :: [PptxDiagram] -> ShowS
Show)

-- | Parse SmartArt diagram from relationship IDs
parseDiagram :: Archive
             -> [(Text, Text)]  -- Slide relationships
             -> Text            -- data relationship ID
             -> Text            -- layout relationship ID
             -> Either Text PptxDiagram
parseDiagram :: Archive
-> [(Text, Text)] -> Text -> Text -> Either Text PptxDiagram
parseDiagram Archive
archive [(Text, Text)]
rels Text
dataRelId Text
layoutRelId = do
  -- Resolve relationships to file paths
  Text
dataTarget <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Relationship not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
dataRelId) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
                Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
dataRelId [(Text, Text)]
rels
  Text
layoutTarget <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Relationship not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
layoutRelId) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
                  Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
layoutRelId [(Text, Text)]
rels

  -- Resolve relative paths (diagrams are in ../diagrams/ from slides/)
  let dataPath :: [Char]
dataPath = Text -> [Char]
resolveDiagramPath Text
dataTarget
      layoutPath :: [Char]
layoutPath = Text -> [Char]
resolveDiagramPath Text
layoutTarget

  -- Load XML files
  Element
dataElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
dataPath
  Element
layoutElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
layoutPath

  -- Extract layout type
  Text
layoutType <- Element -> Either Text Text
extractLayoutType Element
layoutElem

  -- Extract text nodes with hierarchy
  [(Text, [Text])]
nodes <- Element -> Either Text [(Text, [Text])]
extractDiagramNodes Element
dataElem

  PptxDiagram -> Either Text PptxDiagram
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (PptxDiagram -> Either Text PptxDiagram)
-> PptxDiagram -> Either Text PptxDiagram
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, [Text])] -> PptxDiagram
PptxDiagram Text
layoutType [(Text, [Text])]
nodes

-- | Resolve diagram path (handle ../diagrams/ relative paths)
resolveDiagramPath :: Text -> FilePath
resolveDiagramPath :: Text -> [Char]
resolveDiagramPath Text
target =
  if Text
"../diagrams/" Text -> Text -> Bool
`T.isPrefixOf` Text
target
    then [Char]
"ppt/diagrams/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack (Int -> Text -> Text
T.drop Int
12 Text
target)  -- "../diagrams/" = 12 chars
    else Text -> [Char]
T.unpack Text
target

-- | Load XML from archive
loadXMLFromArchive :: Archive -> FilePath -> Either Text Element
loadXMLFromArchive :: Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path =
  case [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
path Archive
archive of
    Maybe Entry
Nothing -> Text -> Either Text Element
forall a b. a -> Either a b
Left (Text -> Either Text Element) -> Text -> Either Text Element
forall a b. (a -> b) -> a -> b
$ Text
"File not found in archive: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
path
    Just Entry
entry ->
      let xmlBytes :: ByteString
xmlBytes = Entry -> ByteString
fromEntry Entry
entry
          lazyText :: Text
lazyText = ByteString -> Text
TL.decodeUtf8 ByteString
xmlBytes
       in Text -> Either Text Element
parseXMLElement Text
lazyText

-- | Extract layout type from layout XML
extractLayoutType :: Element -> Either Text Text
extractLayoutType :: Element -> Either Text Text
extractLayoutType Element
layoutElem = do
  -- Look for uniqueId attribute: "urn:.../layout/chevron2"
  case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"uniqueId") Element
layoutElem of
    Just Text
uid ->
      -- Extract last part after last /
      let layoutName :: Text
layoutName = (Char -> Bool) -> Text -> Text
T.takeWhileEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/') Text
uid
       in Text -> Either Text Text
forall a b. b -> Either a b
Right Text
layoutName
    Maybe Text
Nothing ->
      -- Fallback: look for title
      case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"dgm" Text
"title" Element
layoutElem Maybe Element -> (Element -> Maybe Text) -> Maybe Text
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
           QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"val") of
        Just Text
title -> Text -> Either Text Text
forall a b. b -> Either a b
Right Text
title
        Maybe Text
Nothing -> Text -> Either Text Text
forall a b. b -> Either a b
Right Text
"unknown"
  where
    ns :: NameSpaces
ns = Element -> NameSpaces
elemToNameSpaces Element
layoutElem

-- | Extract text nodes from diagram data
extractDiagramNodes :: Element -> Either Text [(Text, [Text])]
extractDiagramNodes :: Element -> Either Text [(Text, [Text])]
extractDiagramNodes Element
dataElem = do
  let ns :: NameSpaces
ns = Element -> NameSpaces
elemToNameSpaces Element
dataElem

  -- Find point list
  Element
ptLst <- Text -> Maybe Element -> Either Text Element
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing dgm:ptLst" (Maybe Element -> Either Text Element)
-> Maybe Element -> Either Text Element
forall a b. (a -> b) -> a -> b
$
           NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"dgm" Text
"ptLst" Element
dataElem

  let ptElems :: [Element]
ptElems = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"dgm" Text
"pt" Element
ptLst

  -- Build node map: modelId → text
  let nodeMap :: NameSpaces
nodeMap = [(Text, Text)] -> NameSpaces
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, Text)] -> NameSpaces) -> [(Text, Text)] -> NameSpaces
forall a b. (a -> b) -> a -> b
$ (Element -> Maybe (Text, Text)) -> [Element] -> [(Text, Text)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (NameSpaces -> Element -> Maybe (Text, Text)
extractNodeText NameSpaces
ns) [Element]
ptElems

  -- Parse connections
  let cxnLst :: Maybe Element
cxnLst = NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"dgm" Text
"cxnLst" Element
dataElem
      connections :: [Connection]
connections = [Connection]
-> (Element -> [Connection]) -> Maybe Element -> [Connection]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] (NameSpaces -> Element -> [Connection]
parseConnections NameSpaces
ns) Maybe Element
cxnLst

  -- Build parent-child map
  let parentMap :: Map Text [Text]
parentMap = [Connection] -> Map Text [Text]
buildParentMap [Connection]
connections

  -- Find parent nodes (nodes that have children)
  let parentIds :: [Text]
parentIds = Map Text [Text] -> [Text]
forall k a. Map k a -> [k]
M.keys Map Text [Text]
parentMap

  -- Build hierarchy - only show nodes that are parents
  -- (children are shown under their parents)
  let hierarchy :: [(Text, [Text])]
hierarchy = (Text -> (Text, [Text])) -> [Text] -> [(Text, [Text])]
forall a b. (a -> b) -> [a] -> [b]
map (NameSpaces -> Map Text [Text] -> Text -> (Text, [Text])
buildNodeWithChildren NameSpaces
nodeMap Map Text [Text]
parentMap) [Text]
parentIds
      -- Filter out nodes with empty text (presentation nodes)
      validHierarchy :: [(Text, [Text])]
validHierarchy = ((Text, [Text]) -> Bool) -> [(Text, [Text])] -> [(Text, [Text])]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Text
nodeText, [Text]
_) -> Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
nodeText) [(Text, [Text])]
hierarchy

  [(Text, [Text])] -> Either Text [(Text, [Text])]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return [(Text, [Text])]
validHierarchy

-- | Extract text from a point element (returns Nothing if no text)
extractNodeText :: NameSpaces -> Element -> Maybe (Text, Text)
extractNodeText :: NameSpaces -> Element -> Maybe (Text, Text)
extractNodeText NameSpaces
ns Element
ptElem = do
  Text
modelId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"modelId") Element
ptElem

  -- Extract text from dgm:t element (which contains a:p/a:r/a:t)
  let text :: Text
text = case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"dgm" Text
"t" Element
ptElem of
        Just Element
tElem ->
          -- Recursively get ALL text content from all descendants
          Element -> Text
getAllText Element
tElem
        Maybe Element
Nothing -> Text
""

  -- Only return nodes with actual text
  if Text -> Bool
T.null (Text -> Text
T.strip Text
text)
    then Maybe (Text, Text)
forall a. Maybe a
Nothing
    else (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
modelId, Text
text)

-- | Connection between nodes
data Connection = Connection
  { Connection -> Text
connType :: Text
  , Connection -> Text
connSrc  :: Text
  , Connection -> Text
connDest :: Text
  } deriving (Int -> Connection -> ShowS
[Connection] -> ShowS
Connection -> [Char]
(Int -> Connection -> ShowS)
-> (Connection -> [Char])
-> ([Connection] -> ShowS)
-> Show Connection
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Connection -> ShowS
showsPrec :: Int -> Connection -> ShowS
$cshow :: Connection -> [Char]
show :: Connection -> [Char]
$cshowList :: [Connection] -> ShowS
showList :: [Connection] -> ShowS
Show)

-- | Parse connections
parseConnections :: NameSpaces -> Element -> [Connection]
parseConnections :: NameSpaces -> Element -> [Connection]
parseConnections NameSpaces
ns Element
cxnLst =
  let cxnElems :: [Element]
cxnElems = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"dgm" Text
"cxn" Element
cxnLst
   in (Element -> Maybe Connection) -> [Element] -> [Connection]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (NameSpaces -> Element -> Maybe Connection
parseConnection NameSpaces
ns) [Element]
cxnElems

parseConnection :: NameSpaces -> Element -> Maybe Connection
parseConnection :: NameSpaces -> Element -> Maybe Connection
parseConnection NameSpaces
_ns Element
cxnElem = do
  let cxnType :: Text
cxnType = Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Text -> Text
forall a. a -> a
id (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"type") Element
cxnElem  -- Empty if no type
  Text
srcId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"srcId") Element
cxnElem
  Text
destId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"destId") Element
cxnElem
  Connection -> Maybe Connection
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Connection -> Maybe Connection) -> Connection -> Maybe Connection
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> Connection
Connection Text
cxnType Text
srcId Text
destId

-- | Build parent-child map from connections
-- Use connections WITHOUT a type attribute (these are the data hierarchy)
buildParentMap :: [Connection] -> M.Map Text [Text]
buildParentMap :: [Connection] -> Map Text [Text]
buildParentMap [Connection]
connections =
  let dataConnections :: [Connection]
dataConnections = (Connection -> Bool) -> [Connection] -> [Connection]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Connection
c -> Text -> Bool
T.null (Connection -> Text
connType Connection
c)) [Connection]
connections
   in (Connection -> Map Text [Text] -> Map Text [Text])
-> Map Text [Text] -> [Connection] -> Map Text [Text]
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Connection -> Map Text [Text] -> Map Text [Text]
addConn Map Text [Text]
forall k a. Map k a
M.empty [Connection]
dataConnections
  where
    addConn :: Connection -> Map Text [Text] -> Map Text [Text]
addConn Connection
conn Map Text [Text]
m = ([Text] -> [Text] -> [Text])
-> Text -> [Text] -> Map Text [Text] -> Map Text [Text]
forall k a. Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a
M.insertWith [Text] -> [Text] -> [Text]
forall a. [a] -> [a] -> [a]
(++) (Connection -> Text
connSrc Connection
conn) [Connection -> Text
connDest Connection
conn] Map Text [Text]
m

-- | Build node with its children
buildNodeWithChildren :: M.Map Text Text -> M.Map Text [Text] -> Text -> (Text, [Text])
buildNodeWithChildren :: NameSpaces -> Map Text [Text] -> Text -> (Text, [Text])
buildNodeWithChildren NameSpaces
nodeMap Map Text [Text]
parentMap Text
nodeId =
  let nodeText :: Text
nodeText = Text -> Text -> NameSpaces -> Text
forall k a. Ord k => a -> k -> Map k a -> a
M.findWithDefault Text
"" Text
nodeId NameSpaces
nodeMap
      childIds :: [Text]
childIds = [Text] -> Text -> Map Text [Text] -> [Text]
forall k a. Ord k => a -> k -> Map k a -> a
M.findWithDefault [] Text
nodeId Map Text [Text]
parentMap
      -- Only include children that have text
      childTexts :: [Text]
childTexts = (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) ([Text] -> [Text]) -> [Text] -> [Text]
forall a b. (a -> b) -> a -> b
$
                   (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
cid -> Text -> Text -> NameSpaces -> Text
forall k a. Ord k => a -> k -> Map k a -> a
M.findWithDefault Text
"" Text
cid NameSpaces
nodeMap) [Text]
childIds
   in (Text
nodeText, [Text]
childTexts)

-- | Convert diagram to Pandoc blocks
diagramToBlocks :: PptxDiagram -> [Block]
diagramToBlocks :: PptxDiagram -> [Block]
diagramToBlocks PptxDiagram
diagram =
  let nodes :: [(Text, [Text])]
nodes = PptxDiagram -> [(Text, [Text])]
diagramNodes PptxDiagram
diagram
      layoutType :: Text
layoutType = PptxDiagram -> Text
diagramType PptxDiagram
diagram

      -- Build content blocks
      contentBlocks :: [Block]
contentBlocks = ((Text, [Text]) -> [Block]) -> [(Text, [Text])] -> [Block]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Text, [Text]) -> [Block]
nodeToBlocks [(Text, [Text])]
nodes

   in [Attr -> [Block] -> Block
Div (Text
"", [Text
"smartart", Text
layoutType], [(Text
"layout", Text
layoutType)])
           [Block]
contentBlocks]

-- | Convert node to blocks
nodeToBlocks :: (Text, [Text]) -> [Block]
nodeToBlocks :: (Text, [Text]) -> [Block]
nodeToBlocks (Text
nodeText, [Text]
childTexts) =
  if [Text] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
childTexts
    then [[Inline] -> Block
Para [[Inline] -> Inline
Strong [Text -> Inline
Str Text
nodeText]]]
    else [ [Inline] -> Block
Para [[Inline] -> Inline
Strong [Text -> Inline
Str Text
nodeText]]
         , [[Block]] -> Block
BulletList [[[Inline] -> Block
Plain [Text -> Inline
Str Text
child]] | Text
child <- [Text]
childTexts]
         ]

-- | Recursively extract all text from an element and its descendants
getAllText :: Element -> Text
getAllText :: Element -> Text
getAllText Element
el =
  let textFromContent :: Content -> Text
textFromContent (Text CData
cdata) = CData -> Text
cdData CData
cdata
      textFromContent (Elem Element
e) = Element -> Text
getAllText Element
e
      textFromContent Content
_ = Text
""
      texts :: [Text]
texts = (Content -> Text) -> [Content] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Content -> Text
textFromContent (Element -> [Content]
elContent Element
el)
   in [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
texts

-- Helper functions
maybeToEither :: Text -> Maybe a -> Either Text a
maybeToEither :: forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
err Maybe a
Nothing = Text -> Either Text a
forall a b. a -> Either a b
Left Text
err
maybeToEither Text
_ (Just a
x) = a -> Either Text a
forall a b. b -> Either a b
Right a
x