{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-partial-fields #-}
{- |
   Module      : Text.Pandoc.Readers.Pptx.Shapes
   Copyright   : © 2025 Anton Antic
   License     : GNU GPL, version 2 or above

   Maintainer  : Anton Antic <anton@everworker.ai>
   Stability   : alpha
   Portability : portable

Parsing of PPTX shapes (text boxes, images, tables, diagrams).
-}
module Text.Pandoc.Readers.Pptx.Shapes
  ( PptxShape(..)
  , PptxParagraph(..)
  , BulletType(..)
  , parseShapes
  , parseShape
  , shapeToBlocks
  , isTitlePlaceholder
  , extractDrawingMLText
  ) where

import Codec.Archive.Zip (Archive, findEntryByPath, fromEntry)
import qualified Data.ByteString.Lazy as B
import Data.List (find, groupBy)
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import Data.Text (Text)
import Text.Read (readMaybe)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Definition
import Text.Pandoc.Readers.OOXML.Shared
import Text.Pandoc.Readers.Pptx.SmartArt
import Text.Pandoc.XML.Light

-- | Paragraph with bullet/numbering information
data PptxParagraph = PptxParagraph
  { PptxParagraph -> Int
paraLevel   :: Int            -- Bullet level (0, 1, 2...)
  , PptxParagraph -> BulletType
paraBullet  :: BulletType
  , PptxParagraph -> Text
paraText    :: Text
  } deriving (Int -> PptxParagraph -> ShowS
[PptxParagraph] -> ShowS
PptxParagraph -> String
(Int -> PptxParagraph -> ShowS)
-> (PptxParagraph -> String)
-> ([PptxParagraph] -> ShowS)
-> Show PptxParagraph
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PptxParagraph -> ShowS
showsPrec :: Int -> PptxParagraph -> ShowS
$cshow :: PptxParagraph -> String
show :: PptxParagraph -> String
$cshowList :: [PptxParagraph] -> ShowS
showList :: [PptxParagraph] -> ShowS
Show)

-- | Bullet type
data BulletType
  = NoBullet
  | Bullet                        -- Has bullet (character detected or implicit)
  | WingdingsBullet              -- Detected via Wingdings symbol
  deriving (Int -> BulletType -> ShowS
[BulletType] -> ShowS
BulletType -> String
(Int -> BulletType -> ShowS)
-> (BulletType -> String)
-> ([BulletType] -> ShowS)
-> Show BulletType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> BulletType -> ShowS
showsPrec :: Int -> BulletType -> ShowS
$cshow :: BulletType -> String
show :: BulletType -> String
$cshowList :: [BulletType] -> ShowS
showList :: [BulletType] -> ShowS
Show, BulletType -> BulletType -> Bool
(BulletType -> BulletType -> Bool)
-> (BulletType -> BulletType -> Bool) -> Eq BulletType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: BulletType -> BulletType -> Bool
== :: BulletType -> BulletType -> Bool
$c/= :: BulletType -> BulletType -> Bool
/= :: BulletType -> BulletType -> Bool
Eq)

-- | Shape types in PPTX slides
data PptxShape
  = PptxTextBox [PptxParagraph]         -- Parsed paragraphs with bullet info
  | PptxPicture
      { PptxShape -> Text
picRelId  :: Text               -- Relationship ID (lazy loading)
      , PptxShape -> Text
picTitle  :: Text
      , PptxShape -> Text
picAlt    :: Text
      }
  | PptxTable [[Text]]                  -- Simple text cells for now
  | PptxDiagramRef
      { PptxShape -> Text
dgmDataRelId   :: Text          -- Relationship to data.xml
      , PptxShape -> Text
dgmLayoutRelId :: Text          -- Relationship to layout.xml
      }
  | PptxGraphic Text                    -- Placeholder for other graphics
  deriving (Int -> PptxShape -> ShowS
[PptxShape] -> ShowS
PptxShape -> String
(Int -> PptxShape -> ShowS)
-> (PptxShape -> String)
-> ([PptxShape] -> ShowS)
-> Show PptxShape
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PptxShape -> ShowS
showsPrec :: Int -> PptxShape -> ShowS
$cshow :: PptxShape -> String
show :: PptxShape -> String
$cshowList :: [PptxShape] -> ShowS
showList :: [PptxShape] -> ShowS
Show)

-- | Parse all shapes from shape tree
parseShapes :: NameSpaces -> Element -> [PptxShape]
parseShapes :: NameSpaces -> Element -> [PptxShape]
parseShapes NameSpaces
ns Element
spTreeElem =
  let shapeElems :: [Element]
shapeElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
spTreeElem
      -- Merge parent namespaces with element namespaces
      ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
spTreeElem
   in (Element -> Maybe PptxShape) -> [Element] -> [PptxShape]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (NameSpaces -> Element -> Maybe PptxShape
parseShape NameSpaces
ns') [Element]
shapeElems

-- | Parse individual shape element
parseShape :: NameSpaces -> Element -> Maybe PptxShape
parseShape :: NameSpaces -> Element -> Maybe PptxShape
parseShape NameSpaces
ns Element
el
  -- Text box: <p:sp> with <p:txBody>
  | NameSpaces -> Text -> Text -> Element -> Bool
isElem NameSpaces
ns Text
"p" Text
"sp" Element
el =
      case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"txBody" Element
el of
        Just Element
txBody ->
          let paras :: [PptxParagraph]
paras = NameSpaces -> Element -> [PptxParagraph]
parseParagraphs NameSpaces
ns Element
txBody
           in if [PptxParagraph] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [PptxParagraph]
paras
              then Maybe PptxShape
forall a. Maybe a
Nothing
              else PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ [PptxParagraph] -> PptxShape
PptxTextBox [PptxParagraph]
paras
        Maybe Element
Nothing -> Maybe PptxShape
forall a. Maybe a
Nothing

  -- Picture: <p:pic>
  | NameSpaces -> Text -> Text -> Element -> Bool
isElem NameSpaces
ns Text
"p" Text
"pic" Element
el = do
      Element
nvPicPr <- NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"nvPicPr" Element
el
      Element
cNvPr <- NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"cNvPr" Element
nvPicPr

      let title :: Text
title = Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Text -> Text
forall a. a -> a
id (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"name") Element
cNvPr
          alt :: Text
alt = Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Text -> Text
forall a. a -> a
id (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"descr") Element
cNvPr

      -- Get blip relationship ID
      Element
blipFill <- NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"blipFill" Element
el
      Element
blip <- NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"blip" Element
blipFill
      Text
relId <- NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns Text
"r" Text
"embed" Element
blip

      PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> PptxShape
PptxPicture Text
relId Text
title Text
alt

  -- GraphicFrame: table or diagram
  | NameSpaces -> Text -> Text -> Element -> Bool
isElem NameSpaces
ns Text
"p" Text
"graphicFrame" Element
el =
      case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"graphic" Element
el Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
           NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"graphicData" of
        Maybe Element
Nothing -> Maybe PptxShape
forall a. Maybe a
Nothing
        Just Element
graphicData ->
          case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"uri") Element
graphicData of
            Maybe Text
Nothing -> PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> PptxShape
PptxGraphic Text
"no-uri"
            Just Text
uri ->
              if Text
"table" Text -> Text -> Bool
`T.isInfixOf` Text
uri
                then
                  -- Table
                  case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"tbl" Element
graphicData of
                    Just Element
tbl ->
                      let rows :: [[Text]]
rows = NameSpaces -> Element -> [[Text]]
parseTableRows NameSpaces
ns Element
tbl
                       in PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ [[Text]] -> PptxShape
PptxTable [[Text]]
rows
                    Maybe Element
Nothing -> Maybe PptxShape
forall a. Maybe a
Nothing
                else if Text
"diagram" Text -> Text -> Bool
`T.isInfixOf` Text
uri
                  then
                    -- SmartArt diagram - dgm namespace is declared inline on relIds element
                    let dgmRelIds :: Maybe Element
dgmRelIds = (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"relIds") (Element -> [Element]
elChildren Element
graphicData)
                     in case Maybe Element
dgmRelIds of
                          Maybe Element
Nothing -> PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> PptxShape
PptxGraphic Text
"diagram-no-relIds"
                          Just Element
relIdsElem ->
                            -- Get r:dm and r:lo attributes (r namespace is in parent)
                            let ns' :: NameSpaces
ns' = NameSpaces
ns NameSpaces -> NameSpaces -> NameSpaces
forall a. Semigroup a => a -> a -> a
<> Element -> NameSpaces
elemToNameSpaces Element
relIdsElem
                             in case (NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns' Text
"r" Text
"dm" Element
relIdsElem,
                                      NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns' Text
"r" Text
"lo" Element
relIdsElem) of
                                  (Just Text
dataRelId, Just Text
layoutRelId) ->
                                    PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> Text -> PptxShape
PptxDiagramRef Text
dataRelId Text
layoutRelId
                                  (Maybe Text, Maybe Text)
_ -> PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> PptxShape
PptxGraphic Text
"diagram-missing-rels"
                  else
                    -- Other graphic (chart, etc.)
                    PptxShape -> Maybe PptxShape
forall a. a -> Maybe a
Just (PptxShape -> Maybe PptxShape) -> PptxShape -> Maybe PptxShape
forall a b. (a -> b) -> a -> b
$ Text -> PptxShape
PptxGraphic (Text
"other: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
uri)

  -- Skip other shapes for now
  | Bool
otherwise = Maybe PptxShape
forall a. Maybe a
Nothing

-- | Parse table rows (simple text extraction)
parseTableRows :: NameSpaces -> Element -> [[Text]]
parseTableRows :: NameSpaces -> Element -> [[Text]]
parseTableRows NameSpaces
ns Element
tblElem =
  let trElems :: [Element]
trElems = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"a" Text
"tr" Element
tblElem
   in (Element -> [Text]) -> [Element] -> [[Text]]
forall a b. (a -> b) -> [a] -> [b]
map (NameSpaces -> Element -> [Text]
parseTableRow NameSpaces
ns) [Element]
trElems

parseTableRow :: NameSpaces -> Element -> [Text]
parseTableRow :: NameSpaces -> Element -> [Text]
parseTableRow NameSpaces
ns Element
trElem =
  let tcElems :: [Element]
tcElems = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"a" Text
"tc" Element
trElem
   in (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Element -> Text
extractCellText [Element]
tcElems
  where
    extractCellText :: Element -> Text
extractCellText Element
tcElem =
      -- Get text from txBody/a:p/a:r/a:t
      case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"txBody" Element
tcElem of
        Just Element
txBody -> Element -> Text
extractDrawingMLText Element
txBody
        Maybe Element
Nothing -> Text
""

-- | Convert shape to Pandoc blocks
shapeToBlocks :: PandocMonad m => Archive -> [(Text, Text)] -> PptxShape -> m [Block]
shapeToBlocks :: forall (m :: * -> *).
PandocMonad m =>
Archive -> [(Text, Text)] -> PptxShape -> m [Block]
shapeToBlocks Archive
_archive [(Text, Text)]
_rels (PptxTextBox [PptxParagraph]
paras) =
  [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Block] -> m [Block]) -> [Block] -> m [Block]
forall a b. (a -> b) -> a -> b
$ [PptxParagraph] -> [Block]
paragraphsToBlocks [PptxParagraph]
paras
shapeToBlocks Archive
archive [(Text, Text)]
rels (PptxPicture Text
relId Text
title Text
alt) = do
  -- Resolve relationship to get media path
  case Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
relId [(Text, Text)]
rels of
    Maybe Text
Nothing -> [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return []  -- Image not found
    Just Text
target -> do
      let mediaPath :: Text
mediaPath = Text -> Text
resolveMediaPath Text
target

      -- Load image bytes and add to MediaBag
      case Archive -> Text -> Maybe ByteString
loadMediaFromArchive Archive
archive Text
mediaPath of
        Maybe ByteString
Nothing -> [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return []
        Just ByteString
mediaBytes -> do
          String -> Maybe Text -> ByteString -> m ()
forall (m :: * -> *).
PandocMonad m =>
String -> Maybe Text -> ByteString -> m ()
P.insertMedia (Text -> String
T.unpack Text
mediaPath) Maybe Text
forall a. Maybe a
Nothing ByteString
mediaBytes

          let altText :: [Inline]
altText = if Text -> Bool
T.null Text
alt then [] else [Text -> Inline
Str Text
alt]
          [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return [[Inline] -> Block
Para [Attr -> [Inline] -> (Text, Text) -> Inline
Image Attr
nullAttr [Inline]
altText (Text
mediaPath, Text
title)]]

shapeToBlocks Archive
_archive [(Text, Text)]
_rels (PptxTable [[Text]]
rows) =
  -- Simple table representation for now
  case [[Text]]
rows of
    [] -> [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return []
    ([Text]
headerRow:[[Text]]
bodyRows) -> do
      let makeCell :: Text -> Cell
makeCell Text
text = Attr -> Alignment -> RowSpan -> ColSpan -> [Block] -> Cell
Cell Attr
nullAttr Alignment
AlignDefault (Int -> RowSpan
RowSpan Int
1) (Int -> ColSpan
ColSpan Int
1) [[Inline] -> Block
Plain [Text -> Inline
Str Text
text]]
          headerCells :: [Cell]
headerCells = (Text -> Cell) -> [Text] -> [Cell]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Cell
makeCell [Text]
headerRow
          bodyCells :: [[Cell]]
bodyCells = ([Text] -> [Cell]) -> [[Text]] -> [[Cell]]
forall a b. (a -> b) -> [a] -> [b]
map ((Text -> Cell) -> [Text] -> [Cell]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Cell
makeCell) [[Text]]
bodyRows
          caption :: Caption
caption = Maybe [Inline] -> [Block] -> Caption
Caption Maybe [Inline]
forall a. Maybe a
Nothing []
          colSpec :: [(Alignment, ColWidth)]
colSpec = Int -> (Alignment, ColWidth) -> [(Alignment, ColWidth)]
forall a. Int -> a -> [a]
replicate ([Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
headerRow) (Alignment
AlignDefault, ColWidth
ColWidthDefault)
          headerRow' :: Row
headerRow' = Attr -> [Cell] -> Row
Row Attr
nullAttr [Cell]
headerCells
          bodyRows' :: [Row]
bodyRows' = ([Cell] -> Row) -> [[Cell]] -> [Row]
forall a b. (a -> b) -> [a] -> [b]
map (Attr -> [Cell] -> Row
Row Attr
nullAttr) [[Cell]]
bodyCells
          thead :: TableHead
thead = Attr -> [Row] -> TableHead
TableHead Attr
nullAttr [Row
headerRow']
          tbody :: [TableBody]
tbody = [Attr -> RowHeadColumns -> [Row] -> [Row] -> TableBody
TableBody Attr
nullAttr RowHeadColumns
0 [] [Row]
bodyRows']
          tfoot :: TableFoot
tfoot = Attr -> [Row] -> TableFoot
TableFoot Attr
nullAttr []
      [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return [Attr
-> Caption
-> [(Alignment, ColWidth)]
-> TableHead
-> [TableBody]
-> TableFoot
-> Block
Table Attr
nullAttr Caption
caption [(Alignment, ColWidth)]
colSpec TableHead
thead [TableBody]
tbody TableFoot
tfoot]

shapeToBlocks Archive
archive [(Text, Text)]
rels (PptxDiagramRef Text
dataRelId Text
layoutRelId) = do
  -- Parse SmartArt diagram
  case Archive
-> [(Text, Text)] -> Text -> Text -> Either Text PptxDiagram
parseDiagram Archive
archive [(Text, Text)]
rels Text
dataRelId Text
layoutRelId of
    Left Text
err -> do
      -- Failed to parse diagram, return placeholder
      [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return [[Inline] -> Block
Para [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ Text
"[Diagram parse error: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
err Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"]"]]
    Right PptxDiagram
diagram ->
      [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Block] -> m [Block]) -> [Block] -> m [Block]
forall a b. (a -> b) -> a -> b
$ PptxDiagram -> [Block]
diagramToBlocks PptxDiagram
diagram
shapeToBlocks Archive
_archive [(Text, Text)]
_rels (PptxGraphic Text
text) =
  -- Placeholder for other graphics (charts, etc.)
  [Block] -> m [Block]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return [[Inline] -> Block
Para [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ Text
"[Graphic: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
text Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"]"]]

-- | Resolve media path (handle relative paths)
resolveMediaPath :: Text -> Text
resolveMediaPath :: Text -> Text
resolveMediaPath Text
target =
  if Text
"../media/" Text -> Text -> Bool
`T.isPrefixOf` Text
target
    then Text
"ppt/media/" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text -> Text
T.drop Int
9 Text
target  -- "../media/" = 9 chars
    else if Text
"media/" Text -> Text -> Bool
`T.isPrefixOf` Text
target
      then Text
"ppt/" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
target
      else Text
target

-- | Load media file from archive
loadMediaFromArchive :: Archive -> Text -> Maybe B.ByteString
loadMediaFromArchive :: Archive -> Text -> Maybe ByteString
loadMediaFromArchive Archive
archive Text
path =
  case String -> Archive -> Maybe Entry
findEntryByPath (Text -> String
T.unpack Text
path) Archive
archive of
    Just Entry
entry -> ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ Entry -> ByteString
fromEntry Entry
entry
    Maybe Entry
Nothing -> Maybe ByteString
forall a. Maybe a
Nothing

-- | Parse paragraphs from text box
parseParagraphs :: NameSpaces -> Element -> [PptxParagraph]
parseParagraphs :: NameSpaces -> Element -> [PptxParagraph]
parseParagraphs NameSpaces
ns Element
txBody =
  let pElems :: [Element]
pElems = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"a" Text
"p" Element
txBody
   in (Element -> PptxParagraph) -> [Element] -> [PptxParagraph]
forall a b. (a -> b) -> [a] -> [b]
map (NameSpaces -> Element -> PptxParagraph
parseParagraph NameSpaces
ns) [Element]
pElems

-- | Parse individual paragraph
parseParagraph :: NameSpaces -> Element -> PptxParagraph
parseParagraph :: NameSpaces -> Element -> PptxParagraph
parseParagraph NameSpaces
ns Element
pElem =
  let level :: Int
level = NameSpaces -> Element -> Int
parseBulletLevel NameSpaces
ns Element
pElem
      bullet :: BulletType
bullet = NameSpaces -> Element -> BulletType
detectBulletType NameSpaces
ns Element
pElem
      text :: Text
text = NameSpaces -> Element -> Text
extractParagraphText NameSpaces
ns Element
pElem
   in Int -> BulletType -> Text -> PptxParagraph
PptxParagraph Int
level BulletType
bullet Text
text

-- | Parse bullet level from paragraph properties
parseBulletLevel :: NameSpaces -> Element -> Int
parseBulletLevel :: NameSpaces -> Element -> Int
parseBulletLevel NameSpaces
ns Element
pElem =
  case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"pPr" Element
pElem Maybe Element -> (Element -> Maybe Text) -> Maybe Text
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
       QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"lvl") Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
       (\Text
s -> String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
T.unpack Text
s) :: Maybe Int) of
    Just Int
lvl -> Int
lvl
    Maybe Int
Nothing -> Int
0  -- Default to level 0

-- | Detect bullet type
detectBulletType :: NameSpaces -> Element -> BulletType
detectBulletType :: NameSpaces -> Element -> BulletType
detectBulletType NameSpaces
ns Element
pElem =
  -- Check for explicit <a:pPr><a:buChar>
  case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"pPr" Element
pElem Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
       NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"buChar" of
    Just Element
_buCharElem -> BulletType
Bullet
    Maybe Element
Nothing ->
      -- Check for Wingdings symbol (common in PowerPoint)
      if NameSpaces -> Element -> Bool
hasWingdingsSymbol NameSpaces
ns Element
pElem
        then BulletType
WingdingsBullet
        else BulletType
NoBullet

-- | Check if paragraph starts with Wingdings symbol
hasWingdingsSymbol :: NameSpaces -> Element -> Bool
hasWingdingsSymbol :: NameSpaces -> Element -> Bool
hasWingdingsSymbol NameSpaces
ns Element
pElem =
  let runs :: [Element]
runs = NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName NameSpaces
ns Text
"a" Text
"r" Element
pElem
      checkRun :: Element -> Bool
checkRun Element
r = case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"rPr" Element
r Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                        NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"a" Text
"sym" of
                     Just Element
symElem ->
                       case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"typeface") Element
symElem of
                         Just Text
typeface -> Text
"Wingdings" Text -> Text -> Bool
`T.isInfixOf` Text
typeface
                         Maybe Text
Nothing -> Bool
False
                     Maybe Element
Nothing -> Bool
False
   in (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Element -> Bool
checkRun [Element]
runs

-- | Extract text from paragraph
extractParagraphText :: NameSpaces -> Element -> Text
extractParagraphText :: NameSpaces -> Element -> Text
extractParagraphText NameSpaces
_ns Element
pElem =
  -- Find all <a:t> elements and concatenate
  let textElems :: [Element]
textElems = (QName -> Bool) -> Element -> [Element]
filterElementsName (\QName
qn -> QName -> Text
qName QName
qn Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"t") Element
pElem
      texts :: [Text]
texts = (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Element -> Text
strContent [Element]
textElems
   in [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
texts

-- | Extract text from DrawingML element (finds all <a:t> descendants)
extractDrawingMLText :: Element -> Text
extractDrawingMLText :: Element -> Text
extractDrawingMLText Element
el =
  let textElems :: [Element]
textElems = (QName -> Bool) -> Element -> [Element]
filterElementsName (\QName
qn -> QName -> Text
qName QName
qn Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"t") Element
el
      texts :: [Text]
texts = (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Element -> Text
strContent [Element]
textElems
   in [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
texts

-- | Convert paragraphs to blocks, grouping bullets into lists
paragraphsToBlocks :: [PptxParagraph] -> [Block]
paragraphsToBlocks :: [PptxParagraph] -> [Block]
paragraphsToBlocks [PptxParagraph]
paras =
  -- If we have multiple paragraphs with bullets, group them
  let hasBullets :: Bool
hasBullets = (PptxParagraph -> Bool) -> [PptxParagraph] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\PptxParagraph
p -> PptxParagraph -> BulletType
paraBullet PptxParagraph
p BulletType -> BulletType -> Bool
forall a. Eq a => a -> a -> Bool
/= BulletType
NoBullet) [PptxParagraph]
paras
   in if Bool
hasBullets
      then [PptxParagraph] -> [Block]
groupBulletParagraphs [PptxParagraph]
paras
      else (PptxParagraph -> Block) -> [PptxParagraph] -> [Block]
forall a b. (a -> b) -> [a] -> [b]
map (\PptxParagraph
p -> [Inline] -> Block
Para [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ PptxParagraph -> Text
paraText PptxParagraph
p]) [PptxParagraph]
paras

-- | Group bullet paragraphs into lists
groupBulletParagraphs :: [PptxParagraph] -> [Block]
groupBulletParagraphs :: [PptxParagraph] -> [Block]
groupBulletParagraphs [PptxParagraph]
paras =
  let grouped :: [[PptxParagraph]]
grouped = (PptxParagraph -> PptxParagraph -> Bool)
-> [PptxParagraph] -> [[PptxParagraph]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy PptxParagraph -> PptxParagraph -> Bool
sameBulletLevel [PptxParagraph]
paras
   in ([PptxParagraph] -> [Block]) -> [[PptxParagraph]] -> [Block]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [PptxParagraph] -> [Block]
groupToBlock [[PptxParagraph]]
grouped
  where
    sameBulletLevel :: PptxParagraph -> PptxParagraph -> Bool
sameBulletLevel PptxParagraph
p1 PptxParagraph
p2 =
      (PptxParagraph -> BulletType
paraBullet PptxParagraph
p1 BulletType -> BulletType -> Bool
forall a. Eq a => a -> a -> Bool
/= BulletType
NoBullet) Bool -> Bool -> Bool
&&
      (PptxParagraph -> BulletType
paraBullet PptxParagraph
p2 BulletType -> BulletType -> Bool
forall a. Eq a => a -> a -> Bool
/= BulletType
NoBullet) Bool -> Bool -> Bool
&&
      (PptxParagraph -> Int
paraLevel PptxParagraph
p1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== PptxParagraph -> Int
paraLevel PptxParagraph
p2)

    groupToBlock :: [PptxParagraph] -> [Block]
    groupToBlock :: [PptxParagraph] -> [Block]
groupToBlock [] = []
    groupToBlock ps :: [PptxParagraph]
ps@(PptxParagraph
p:[PptxParagraph]
_)
      | PptxParagraph -> BulletType
paraBullet PptxParagraph
p BulletType -> BulletType -> Bool
forall a. Eq a => a -> a -> Bool
/= BulletType
NoBullet =
          -- Bullet list
          let items :: [[Block]]
items = (PptxParagraph -> [Block]) -> [PptxParagraph] -> [[Block]]
forall a b. (a -> b) -> [a] -> [b]
map (\PptxParagraph
para -> [[Inline] -> Block
Plain [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ PptxParagraph -> Text
paraText PptxParagraph
para]]) [PptxParagraph]
ps
           in [[[Block]] -> Block
BulletList [[Block]]
items]
      | Bool
otherwise =
          -- Plain paragraph
          (PptxParagraph -> Block) -> [PptxParagraph] -> [Block]
forall a b. (a -> b) -> [a] -> [b]
map (\PptxParagraph
para -> [Inline] -> Block
Para [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ PptxParagraph -> Text
paraText PptxParagraph
para]) [PptxParagraph]
ps

-- | Check if shape is title placeholder (also used in Slides module)
isTitlePlaceholder :: NameSpaces -> Element -> Bool
isTitlePlaceholder :: NameSpaces -> Element -> Bool
isTitlePlaceholder NameSpaces
ns Element
el =
  case NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"nvSpPr" Element
el Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
       NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"nvPr" Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
       NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"ph" of
    Just Element
phElem ->
      case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"type") Element
phElem of
        Just Text
phType -> Text
phType Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"title" Bool -> Bool -> Bool
|| Text
phType Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"ctrTitle"
        Maybe Text
Nothing -> Bool
False
    Maybe Element
Nothing -> Bool
False