{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.Pptx.Parse
( Pptx(..)
, PresentationDoc(..)
, PptxSlide(..)
, SlideId(..)
, archiveToPptx
) where
import Codec.Archive.Zip (Archive, Entry, findEntryByPath, fromEntry)
import qualified Data.ByteString.Lazy as B
import Data.List (find)
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
import qualified Data.Text.Lazy.Encoding as TL
import Data.Text (Text)
import System.FilePath (splitFileName)
import Text.Pandoc.Readers.OOXML.Shared
import Text.Pandoc.XML.Light
import Text.Read (readMaybe)
newtype SlideId = SlideId Int deriving (Int -> SlideId -> ShowS
[SlideId] -> ShowS
SlideId -> String
(Int -> SlideId -> ShowS)
-> (SlideId -> String) -> ([SlideId] -> ShowS) -> Show SlideId
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SlideId -> ShowS
showsPrec :: Int -> SlideId -> ShowS
$cshow :: SlideId -> String
show :: SlideId -> String
$cshowList :: [SlideId] -> ShowS
showList :: [SlideId] -> ShowS
Show, SlideId -> SlideId -> Bool
(SlideId -> SlideId -> Bool)
-> (SlideId -> SlideId -> Bool) -> Eq SlideId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SlideId -> SlideId -> Bool
== :: SlideId -> SlideId -> Bool
$c/= :: SlideId -> SlideId -> Bool
/= :: SlideId -> SlideId -> Bool
Eq, Eq SlideId
Eq SlideId =>
(SlideId -> SlideId -> Ordering)
-> (SlideId -> SlideId -> Bool)
-> (SlideId -> SlideId -> Bool)
-> (SlideId -> SlideId -> Bool)
-> (SlideId -> SlideId -> Bool)
-> (SlideId -> SlideId -> SlideId)
-> (SlideId -> SlideId -> SlideId)
-> Ord SlideId
SlideId -> SlideId -> Bool
SlideId -> SlideId -> Ordering
SlideId -> SlideId -> SlideId
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: SlideId -> SlideId -> Ordering
compare :: SlideId -> SlideId -> Ordering
$c< :: SlideId -> SlideId -> Bool
< :: SlideId -> SlideId -> Bool
$c<= :: SlideId -> SlideId -> Bool
<= :: SlideId -> SlideId -> Bool
$c> :: SlideId -> SlideId -> Bool
> :: SlideId -> SlideId -> Bool
$c>= :: SlideId -> SlideId -> Bool
>= :: SlideId -> SlideId -> Bool
$cmax :: SlideId -> SlideId -> SlideId
max :: SlideId -> SlideId -> SlideId
$cmin :: SlideId -> SlideId -> SlideId
min :: SlideId -> SlideId -> SlideId
Ord)
data Pptx = Pptx
{ Pptx -> PresentationDoc
pptxPresentation :: PresentationDoc
, Pptx -> [PptxSlide]
pptxSlides :: [PptxSlide]
, Pptx -> Archive
pptxArchive :: Archive
} deriving (Int -> Pptx -> ShowS
[Pptx] -> ShowS
Pptx -> String
(Int -> Pptx -> ShowS)
-> (Pptx -> String) -> ([Pptx] -> ShowS) -> Show Pptx
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Pptx -> ShowS
showsPrec :: Int -> Pptx -> ShowS
$cshow :: Pptx -> String
show :: Pptx -> String
$cshowList :: [Pptx] -> ShowS
showList :: [Pptx] -> ShowS
Show)
data PptxSlide = PptxSlide
{ PptxSlide -> SlideId
slideId :: SlideId
, PptxSlide -> String
slidePath :: FilePath
, PptxSlide -> Element
slideElement :: Element
, PptxSlide -> [(Text, Text)]
slideRels :: [(Text, Text)]
} deriving (Int -> PptxSlide -> ShowS
[PptxSlide] -> ShowS
PptxSlide -> String
(Int -> PptxSlide -> ShowS)
-> (PptxSlide -> String)
-> ([PptxSlide] -> ShowS)
-> Show PptxSlide
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PptxSlide -> ShowS
showsPrec :: Int -> PptxSlide -> ShowS
$cshow :: PptxSlide -> String
show :: PptxSlide -> String
$cshowList :: [PptxSlide] -> ShowS
showList :: [PptxSlide] -> ShowS
Show)
data PresentationDoc = PresentationDoc
{ PresentationDoc -> NameSpaces
presNameSpaces :: NameSpaces
, PresentationDoc -> (Integer, Integer)
presSlideSize :: (Integer, Integer)
, PresentationDoc -> [(SlideId, Text)]
presSlideIds :: [(SlideId, Text)]
} deriving (Int -> PresentationDoc -> ShowS
[PresentationDoc] -> ShowS
PresentationDoc -> String
(Int -> PresentationDoc -> ShowS)
-> (PresentationDoc -> String)
-> ([PresentationDoc] -> ShowS)
-> Show PresentationDoc
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> PresentationDoc -> ShowS
showsPrec :: Int -> PresentationDoc -> ShowS
$cshow :: PresentationDoc -> String
show :: PresentationDoc -> String
$cshowList :: [PresentationDoc] -> ShowS
showList :: [PresentationDoc] -> ShowS
Show)
archiveToPptx :: Archive -> Either Text Pptx
archiveToPptx :: Archive -> Either Text Pptx
archiveToPptx Archive
archive = do
String
presPath <- Archive -> Either Text String
getPresentationXmlPath Archive
archive
Element
presElem <- Archive -> String -> Either Text Element
loadXMLFromArchive Archive
archive String
presPath
PresentationDoc
presDoc <- Element -> Either Text PresentationDoc
elemToPresentation Element
presElem
String
presRelsPath <- Archive -> String -> Either Text String
getPresentationRelsPath Archive
archive String
presPath
[(Text, Text)]
presRels <- Archive -> String -> Either Text [(Text, Text)]
loadRelationships Archive
archive String
presRelsPath
[PptxSlide]
slides <- ((SlideId, Text) -> Either Text PptxSlide)
-> [(SlideId, Text)] -> Either Text [PptxSlide]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (Archive
-> [(Text, Text)] -> (SlideId, Text) -> Either Text PptxSlide
parseSlide Archive
archive [(Text, Text)]
presRels) (PresentationDoc -> [(SlideId, Text)]
presSlideIds PresentationDoc
presDoc)
Pptx -> Either Text Pptx
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pptx -> Either Text Pptx) -> Pptx -> Either Text Pptx
forall a b. (a -> b) -> a -> b
$ PresentationDoc -> [PptxSlide] -> Archive -> Pptx
Pptx PresentationDoc
presDoc [PptxSlide]
slides Archive
archive
getPresentationXmlPath :: Archive -> Either Text FilePath
getPresentationXmlPath :: Archive -> Either Text String
getPresentationXmlPath Archive
archive = do
Entry
relsEntry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing _rels/.rels" (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
String -> Archive -> Maybe Entry
findEntryByPath String
"_rels/.rels" Archive
archive
Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
relsEntry
let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Element -> Bool
isOfficeDocRel [Element]
relElems of
Maybe Element
Nothing -> Text -> Either Text String
forall a b. a -> Either a b
Left (Text -> Either Text String) -> Text -> Either Text String
forall a b. (a -> b) -> a -> b
$ Text
"No presentation.xml relationship found. Found " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>
String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show ([Element] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Element]
relElems)) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" relationships."
Just Element
rel -> do
Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing Target attribute" (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
rel
String -> Either Text String
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Either Text String) -> String -> Either Text String
forall a b. (a -> b) -> a -> b
$ Text -> String
T.unpack Text
target
where
isOfficeDocRel :: Element -> Bool
isOfficeDocRel Element
el =
case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Type") Element
el of
Just Text
relType -> Text
"/officeDocument" Text -> Text -> Bool
`T.isSuffixOf` Text
relType
Maybe Text
Nothing -> Bool
False
loadXMLFromArchive :: Archive -> FilePath -> Either Text Element
loadXMLFromArchive :: Archive -> String -> Either Text Element
loadXMLFromArchive Archive
archive String
path = do
Entry
entry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Entry not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack String
path) (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
String -> Archive -> Maybe Entry
findEntryByPath String
path Archive
archive
let xmlBytes :: ByteString
xmlBytes = Entry -> ByteString
fromEntry Entry
entry
ByteString -> Either Text Element
parseXMLFromBS ByteString
xmlBytes
parseXMLFromBS :: B.ByteString -> Either Text Element
parseXMLFromBS :: ByteString -> Either Text Element
parseXMLFromBS = Text -> Either Text Element
parseXMLElement (Text -> Either Text Element)
-> (ByteString -> Text) -> ByteString -> Either Text Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TL.decodeUtf8
parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry = ByteString -> Either Text Element
parseXMLFromBS (ByteString -> Either Text Element)
-> (Entry -> ByteString) -> Entry -> Either Text Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry
elemToPresentation :: Element -> Either Text PresentationDoc
elemToPresentation :: Element -> Either Text PresentationDoc
elemToPresentation Element
presElem = do
let ns :: NameSpaces
ns = Element -> NameSpaces
elemToNameSpaces Element
presElem
let sizeElem :: Maybe Element
sizeElem = NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"sldSz" Element
presElem
(Integer
widthEMU, Integer
heightEMU) = case Maybe Element
sizeElem of
Just Element
el ->
let cx :: Integer
cx = Text -> Element -> Integer
readAttrInt Text
"cx" Element
el
cy :: Integer
cy = Text -> Element -> Integer
readAttrInt Text
"cy" Element
el
in (Integer
cx, Integer
cy)
Maybe Element
Nothing -> (Integer
9144000, Integer
6858000)
let width :: Integer
width = Integer
widthEMU Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer
emusPerInch
height :: Integer
height = Integer
heightEMU Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`div` Integer
emusPerInch
let sldIdLstElem :: Maybe Element
sldIdLstElem = NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName NameSpaces
ns Text
"p" Text
"sldIdLst" Element
presElem
[(SlideId, Text)]
slideRefs <- case Maybe Element
sldIdLstElem of
Maybe Element
Nothing -> [(SlideId, Text)] -> Either Text [(SlideId, Text)]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return []
Just Element
el -> do
let sldIdElems :: [Element]
sldIdElems = QName -> Element -> [Element]
findChildren (NameSpaces -> Text -> Text -> QName
elemName NameSpaces
ns Text
"p" Text
"sldId") Element
el
((Int, Element) -> Either Text (SlideId, Text))
-> [(Int, Element)] -> Either Text [(SlideId, Text)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (NameSpaces -> (Int, Element) -> Either Text (SlideId, Text)
extractSlideRef NameSpaces
ns) ([Int] -> [Element] -> [(Int, Element)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..] [Element]
sldIdElems)
PresentationDoc -> Either Text PresentationDoc
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (PresentationDoc -> Either Text PresentationDoc)
-> PresentationDoc -> Either Text PresentationDoc
forall a b. (a -> b) -> a -> b
$ PresentationDoc
{ presNameSpaces :: NameSpaces
presNameSpaces = NameSpaces
ns
, presSlideSize :: (Integer, Integer)
presSlideSize = (Integer
width, Integer
height)
, presSlideIds :: [(SlideId, Text)]
presSlideIds = [(SlideId, Text)]
slideRefs
}
extractSlideRef :: NameSpaces -> (Int, Element) -> Either Text (SlideId, Text)
NameSpaces
ns (Int
idx, Element
sldIdElem) = do
Text
relId <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Missing r:id in slide " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
idx)) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns Text
"r" Text
"id" Element
sldIdElem
(SlideId, Text) -> Either Text (SlideId, Text)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> SlideId
SlideId Int
idx, Text
relId)
readAttrInt :: Text -> Element -> Integer
readAttrInt :: Text -> Element -> Integer
readAttrInt Text
attrName Element
el =
case QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
attrName) Element
el of
Just Text
str -> case String -> Maybe Integer
forall a. Read a => String -> Maybe a
readMaybe (Text -> String
T.unpack Text
str) of
Just Integer
n -> Integer
n
Maybe Integer
Nothing -> Integer
0
Maybe Text
Nothing -> Integer
0
getPresentationRelsPath :: Archive -> FilePath -> Either Text FilePath
getPresentationRelsPath :: Archive -> String -> Either Text String
getPresentationRelsPath Archive
_archive String
presPath =
let (String
dir, String
file) = String -> (String, String)
splitFileName String
presPath
relsPath :: String
relsPath = String
dir String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"/_rels/" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
file String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
".rels"
in String -> Either Text String
forall a b. b -> Either a b
Right String
relsPath
loadRelationships :: Archive -> FilePath -> Either Text [(Text, Text)]
loadRelationships :: Archive -> String -> Either Text [(Text, Text)]
loadRelationships Archive
archive String
relsPath =
case String -> Archive -> Maybe Entry
findEntryByPath String
relsPath Archive
archive of
Maybe Entry
Nothing -> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. b -> Either a b
Right []
Just Entry
entry -> do
Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
entry
let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
[(Text, Text)] -> Either Text [(Text, Text)]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Text, Text)] -> Either Text [(Text, Text)])
-> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ (Element -> Maybe (Text, Text)) -> [Element] -> [(Text, Text)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Element -> Maybe (Text, Text)
extractRelationship [Element]
relElems
where
extractRelationship :: Element -> Maybe (Text, Text)
extractRelationship Element
el = do
Text
relId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Id") Element
el
Text
target <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
el
(Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
relId, Text
target)
parseSlide :: Archive -> [(Text, Text)] -> (SlideId, Text) -> Either Text PptxSlide
parseSlide :: Archive
-> [(Text, Text)] -> (SlideId, Text) -> Either Text PptxSlide
parseSlide Archive
archive [(Text, Text)]
rels (SlideId
sid, Text
relId) = do
Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Relationship not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
relId) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
relId [(Text, Text)]
rels
let slidePath' :: String
slidePath' = String
"ppt/" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Text -> String
T.unpack Text
target
Element
slideElem <- Archive -> String -> Either Text Element
loadXMLFromArchive Archive
archive String
slidePath'
String
slideRelsPath <- Archive -> String -> Either Text String
getPresentationRelsPath Archive
archive String
slidePath'
[(Text, Text)]
slideRels' <- Archive -> String -> Either Text [(Text, Text)]
loadRelationships Archive
archive String
slideRelsPath
PptxSlide -> Either Text PptxSlide
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (PptxSlide -> Either Text PptxSlide)
-> PptxSlide -> Either Text PptxSlide
forall a b. (a -> b) -> a -> b
$ SlideId -> String -> Element -> [(Text, Text)] -> PptxSlide
PptxSlide SlideId
sid String
slidePath' Element
slideElem [(Text, Text)]
slideRels'
maybeToEither :: Text -> Maybe a -> Either Text a
maybeToEither :: forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
err Maybe a
Nothing = Text -> Either Text a
forall a b. a -> Either a b
Left Text
err
maybeToEither Text
_ (Just a
x) = a -> Either Text a
forall a b. b -> Either a b
Right a
x