{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Readers.Xlsx.Parse
( Xlsx(..)
, XlsxWorkbook(..)
, XlsxSheet(..)
, SheetId(..)
, SharedStrings
, Styles(..)
, FontInfo(..)
, archiveToXlsx
) where
import Codec.Archive.Zip (Archive, Entry, findEntryByPath, fromEntry)
import Data.List (find)
import qualified Data.Map.Strict as M
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Text as T
import qualified Data.Text.Lazy.Encoding as TL
import Data.Text (Text)
import qualified Data.Vector as V
import System.FilePath (splitFileName)
import Text.Pandoc.Readers.OOXML.Shared
import Text.Pandoc.Readers.Xlsx.Cells
import Text.Pandoc.XML.Light
import Text.Read (readMaybe)
newtype SheetId = SheetId Int deriving (Int -> SheetId -> ShowS
[SheetId] -> ShowS
SheetId -> [Char]
(Int -> SheetId -> ShowS)
-> (SheetId -> [Char]) -> ([SheetId] -> ShowS) -> Show SheetId
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetId -> ShowS
showsPrec :: Int -> SheetId -> ShowS
$cshow :: SheetId -> [Char]
show :: SheetId -> [Char]
$cshowList :: [SheetId] -> ShowS
showList :: [SheetId] -> ShowS
Show, SheetId -> SheetId -> Bool
(SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool) -> Eq SheetId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SheetId -> SheetId -> Bool
== :: SheetId -> SheetId -> Bool
$c/= :: SheetId -> SheetId -> Bool
/= :: SheetId -> SheetId -> Bool
Eq, Eq SheetId
Eq SheetId =>
(SheetId -> SheetId -> Ordering)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> SheetId)
-> (SheetId -> SheetId -> SheetId)
-> Ord SheetId
SheetId -> SheetId -> Bool
SheetId -> SheetId -> Ordering
SheetId -> SheetId -> SheetId
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: SheetId -> SheetId -> Ordering
compare :: SheetId -> SheetId -> Ordering
$c< :: SheetId -> SheetId -> Bool
< :: SheetId -> SheetId -> Bool
$c<= :: SheetId -> SheetId -> Bool
<= :: SheetId -> SheetId -> Bool
$c> :: SheetId -> SheetId -> Bool
> :: SheetId -> SheetId -> Bool
$c>= :: SheetId -> SheetId -> Bool
>= :: SheetId -> SheetId -> Bool
$cmax :: SheetId -> SheetId -> SheetId
max :: SheetId -> SheetId -> SheetId
$cmin :: SheetId -> SheetId -> SheetId
min :: SheetId -> SheetId -> SheetId
Ord)
type SharedStrings = V.Vector Text
data FontInfo = FontInfo
{ FontInfo -> Bool
fontBold :: Bool
, FontInfo -> Bool
fontItalic :: Bool
, FontInfo -> Bool
fontUnderline :: Bool
} deriving (Int -> FontInfo -> ShowS
[FontInfo] -> ShowS
FontInfo -> [Char]
(Int -> FontInfo -> ShowS)
-> (FontInfo -> [Char]) -> ([FontInfo] -> ShowS) -> Show FontInfo
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> FontInfo -> ShowS
showsPrec :: Int -> FontInfo -> ShowS
$cshow :: FontInfo -> [Char]
show :: FontInfo -> [Char]
$cshowList :: [FontInfo] -> ShowS
showList :: [FontInfo] -> ShowS
Show)
data Styles = Styles
{ Styles -> Vector FontInfo
styleFonts :: V.Vector FontInfo
} deriving (Int -> Styles -> ShowS
[Styles] -> ShowS
Styles -> [Char]
(Int -> Styles -> ShowS)
-> (Styles -> [Char]) -> ([Styles] -> ShowS) -> Show Styles
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Styles -> ShowS
showsPrec :: Int -> Styles -> ShowS
$cshow :: Styles -> [Char]
show :: Styles -> [Char]
$cshowList :: [Styles] -> ShowS
showList :: [Styles] -> ShowS
Show)
data Xlsx = Xlsx
{ Xlsx -> XlsxWorkbook
xlsxWorkbook :: XlsxWorkbook
, Xlsx -> [XlsxSheet]
xlsxSheets :: [XlsxSheet]
, Xlsx -> SharedStrings
xlsxSharedStrings :: SharedStrings
, Xlsx -> Styles
xlsxStyles :: Styles
} deriving (Int -> Xlsx -> ShowS
[Xlsx] -> ShowS
Xlsx -> [Char]
(Int -> Xlsx -> ShowS)
-> (Xlsx -> [Char]) -> ([Xlsx] -> ShowS) -> Show Xlsx
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Xlsx -> ShowS
showsPrec :: Int -> Xlsx -> ShowS
$cshow :: Xlsx -> [Char]
show :: Xlsx -> [Char]
$cshowList :: [Xlsx] -> ShowS
showList :: [Xlsx] -> ShowS
Show)
data XlsxWorkbook = XlsxWorkbook
{ XlsxWorkbook -> [(SheetId, Text, Text)]
workbookSheetNames :: [(SheetId, Text, Text)]
} deriving (Int -> XlsxWorkbook -> ShowS
[XlsxWorkbook] -> ShowS
XlsxWorkbook -> [Char]
(Int -> XlsxWorkbook -> ShowS)
-> (XlsxWorkbook -> [Char])
-> ([XlsxWorkbook] -> ShowS)
-> Show XlsxWorkbook
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XlsxWorkbook -> ShowS
showsPrec :: Int -> XlsxWorkbook -> ShowS
$cshow :: XlsxWorkbook -> [Char]
show :: XlsxWorkbook -> [Char]
$cshowList :: [XlsxWorkbook] -> ShowS
showList :: [XlsxWorkbook] -> ShowS
Show)
data XlsxSheet = XlsxSheet
{ XlsxSheet -> SheetId
sheetId :: SheetId
, XlsxSheet -> Text
sheetName :: Text
, XlsxSheet -> Map CellRef XlsxCell
sheetCells :: M.Map CellRef XlsxCell
} deriving (Int -> XlsxSheet -> ShowS
[XlsxSheet] -> ShowS
XlsxSheet -> [Char]
(Int -> XlsxSheet -> ShowS)
-> (XlsxSheet -> [Char])
-> ([XlsxSheet] -> ShowS)
-> Show XlsxSheet
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XlsxSheet -> ShowS
showsPrec :: Int -> XlsxSheet -> ShowS
$cshow :: XlsxSheet -> [Char]
show :: XlsxSheet -> [Char]
$cshowList :: [XlsxSheet] -> ShowS
showList :: [XlsxSheet] -> ShowS
Show)
archiveToXlsx :: Archive -> Either Text Xlsx
archiveToXlsx :: Archive -> Either Text Xlsx
archiveToXlsx Archive
archive = do
[Char]
workbookPath <- Archive -> Either Text [Char]
getWorkbookXmlPath Archive
archive
Element
workbookElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
workbookPath
XlsxWorkbook
workbook <- Element -> Either Text XlsxWorkbook
parseWorkbook Element
workbookElem
Either Text XlsxWorkbook -> Text -> Either Text XlsxWorkbook
forall a. Either Text a -> Text -> Either Text a
`addContext` (Text
"Parsing workbook.xml from: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
workbookPath)
[(Text, Text)]
workbookRels <- Archive -> [Char] -> Either Text [(Text, Text)]
loadRelationships Archive
archive (ShowS
relsPathFor [Char]
workbookPath)
SharedStrings
sharedStrings <- case [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
workbookRels Text
"sharedStrings" of
Just (Text
_, Text
target) -> do
let path :: [Char]
path = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
Element
el <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path
Element -> Either Text SharedStrings
parseSharedStrings Element
el
Maybe (Text, Text)
Nothing -> SharedStrings -> Either Text SharedStrings
forall a b. b -> Either a b
Right SharedStrings
forall a. Vector a
V.empty
Styles
styles <- case [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
workbookRels Text
"styles" of
Just (Text
_, Text
target) -> do
let path :: [Char]
path = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
Element
el <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path
Element -> Either Text Styles
parseStyles Element
el
Maybe (Text, Text)
Nothing -> Styles -> Either Text Styles
forall a b. b -> Either a b
Right (Styles -> Either Text Styles) -> Styles -> Either Text Styles
forall a b. (a -> b) -> a -> b
$ Vector FontInfo -> Styles
Styles Vector FontInfo
forall a. Vector a
V.empty
[XlsxSheet]
sheets <- ((SheetId, Text, Text) -> Either Text XlsxSheet)
-> [(SheetId, Text, Text)] -> Either Text [XlsxSheet]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (\(SheetId, Text, Text)
sheetInfo -> Archive
-> [(Text, Text)]
-> SharedStrings
-> Styles
-> (SheetId, Text, Text)
-> Either Text XlsxSheet
parseSheet Archive
archive [(Text, Text)]
workbookRels SharedStrings
sharedStrings Styles
styles (SheetId, Text, Text)
sheetInfo)
(XlsxWorkbook -> [(SheetId, Text, Text)]
workbookSheetNames XlsxWorkbook
workbook)
Xlsx -> Either Text Xlsx
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Xlsx -> Either Text Xlsx) -> Xlsx -> Either Text Xlsx
forall a b. (a -> b) -> a -> b
$ XlsxWorkbook -> [XlsxSheet] -> SharedStrings -> Styles -> Xlsx
Xlsx XlsxWorkbook
workbook [XlsxSheet]
sheets SharedStrings
sharedStrings Styles
styles
getWorkbookXmlPath :: Archive -> Either Text FilePath
getWorkbookXmlPath :: Archive -> Either Text [Char]
getWorkbookXmlPath Archive
archive = do
Entry
relsEntry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing _rels/.rels" (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
[Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
"_rels/.rels" Archive
archive
Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
relsEntry
let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Element -> Bool
isOfficeDocRel [Element]
relElems of
Maybe Element
Nothing -> Text -> Either Text [Char]
forall a b. a -> Either a b
Left Text
"No workbook.xml relationship found"
Just Element
rel -> do
Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing Target" (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
rel
[Char] -> Either Text [Char]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> Either Text [Char]) -> [Char] -> Either Text [Char]
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
target
where
isOfficeDocRel :: Element -> Bool
isOfficeDocRel Element
el =
case (QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Type") Element
el, QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
el) of
(Just Text
relType, Just Text
target) ->
Text
"officeDocument" Text -> Text -> Bool
`T.isInfixOf` Text
relType Bool -> Bool -> Bool
&& Text
"workbook" Text -> Text -> Bool
`T.isInfixOf` Text
target
(Maybe Text, Maybe Text)
_ -> Bool
False
parseWorkbook :: Element -> Either Text XlsxWorkbook
parseWorkbook :: Element -> Either Text XlsxWorkbook
parseWorkbook Element
wbElem = do
let ns :: NameSpaces
ns = Element -> NameSpaces
elemToNameSpaces Element
wbElem
Element
sheets <- Text -> Maybe Element -> Either Text Element
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing <sheets>" (Maybe Element -> Either Text Element)
-> Maybe Element -> Either Text Element
forall a b. (a -> b) -> a -> b
$
(Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheets") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
wbElem)
let sheetElems :: [Element]
sheetElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheet") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheets)
[(SheetId, Text, Text)]
sheetRefs <- ((Int, Element) -> Either Text (SheetId, Text, Text))
-> [(Int, Element)] -> Either Text [(SheetId, Text, Text)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef NameSpaces
ns) ([Int] -> [Element] -> [(Int, Element)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..] [Element]
sheetElems)
XlsxWorkbook -> Either Text XlsxWorkbook
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxWorkbook -> Either Text XlsxWorkbook)
-> XlsxWorkbook -> Either Text XlsxWorkbook
forall a b. (a -> b) -> a -> b
$ [(SheetId, Text, Text)] -> XlsxWorkbook
XlsxWorkbook [(SheetId, Text, Text)]
sheetRefs
parseSheetRef :: NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef :: NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef NameSpaces
ns (Int
idx, Element
sheetElem) = do
let name :: Text
name = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe (Text
"Sheet" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
idx)) (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$
QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"name") Element
sheetElem
Text
relId <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing r:id" (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns Text
"r" Text
"id" Element
sheetElem
(SheetId, Text, Text) -> Either Text (SheetId, Text, Text)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> SheetId
SheetId Int
idx, Text
name, Text
relId)
parseSharedStrings :: Element -> Either Text SharedStrings
parseSharedStrings :: Element -> Either Text SharedStrings
parseSharedStrings Element
sstElem = do
let siElems :: [Element]
siElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"si") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sstElem)
strings :: [Text]
strings = (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Element -> Text
extractString [Element]
siElems
SharedStrings -> Either Text SharedStrings
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (SharedStrings -> Either Text SharedStrings)
-> SharedStrings -> Either Text SharedStrings
forall a b. (a -> b) -> a -> b
$ [Text] -> SharedStrings
forall a. [a] -> Vector a
V.fromList [Text]
strings
where
extractString :: Element -> Text
extractString Element
siElem =
case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"t") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
siElem) of
Just Element
tElem -> Element -> Text
strContent Element
tElem
Maybe Element
Nothing -> Element -> Text
getAllText Element
siElem
parseStyles :: Element -> Either Text Styles
parseStyles :: Element -> Either Text Styles
parseStyles Element
stylesElem = do
let fontsElem :: Maybe Element
fontsElem = (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"fonts") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
stylesElem)
fontElems :: [Element]
fontElems = [Element] -> (Element -> [Element]) -> Maybe Element -> [Element]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] (\Element
fe -> (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"font") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fe)) Maybe Element
fontsElem
fonts :: Vector FontInfo
fonts = [FontInfo] -> Vector FontInfo
forall a. [a] -> Vector a
V.fromList ([FontInfo] -> Vector FontInfo) -> [FontInfo] -> Vector FontInfo
forall a b. (a -> b) -> a -> b
$ (Element -> FontInfo) -> [Element] -> [FontInfo]
forall a b. (a -> b) -> [a] -> [b]
map (NameSpaces -> Element -> FontInfo
parseFont NameSpaces
forall a. Monoid a => a
mempty) [Element]
fontElems
Styles -> Either Text Styles
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Styles -> Either Text Styles) -> Styles -> Either Text Styles
forall a b. (a -> b) -> a -> b
$ Vector FontInfo -> Styles
Styles Vector FontInfo
fonts
parseFont :: NameSpaces -> Element -> FontInfo
parseFont :: NameSpaces -> Element -> FontInfo
parseFont NameSpaces
_ns Element
fontElem =
FontInfo
{ fontBold :: Bool
fontBold = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"b") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
, fontItalic :: Bool
fontItalic = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"i") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
, fontUnderline :: Bool
fontUnderline = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"u") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
}
parseSheet :: Archive -> [(Text, Text)] -> SharedStrings -> Styles -> (SheetId, Text, Text) -> Either Text XlsxSheet
parseSheet :: Archive
-> [(Text, Text)]
-> SharedStrings
-> Styles
-> (SheetId, Text, Text)
-> Either Text XlsxSheet
parseSheet Archive
archive [(Text, Text)]
rels SharedStrings
sharedStrings Styles
styles (SheetId
sid, Text
name, Text
relId) = do
Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Sheet relationship not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
relId) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
relId [(Text, Text)]
rels
let sheetPath :: [Char]
sheetPath = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
Element
sheetElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
sheetPath
Map CellRef XlsxCell
cells <- Element
-> SharedStrings -> Styles -> Either Text (Map CellRef XlsxCell)
parseSheetCells Element
sheetElem SharedStrings
sharedStrings Styles
styles
XlsxSheet -> Either Text XlsxSheet
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxSheet -> Either Text XlsxSheet)
-> XlsxSheet -> Either Text XlsxSheet
forall a b. (a -> b) -> a -> b
$ SheetId -> Text -> Map CellRef XlsxCell -> XlsxSheet
XlsxSheet SheetId
sid Text
name Map CellRef XlsxCell
cells
parseSheetCells :: Element -> SharedStrings -> Styles -> Either Text (M.Map CellRef XlsxCell)
parseSheetCells :: Element
-> SharedStrings -> Styles -> Either Text (Map CellRef XlsxCell)
parseSheetCells Element
sheetElem SharedStrings
sharedStrings Styles
styles = do
case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheetData") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheetElem) of
Maybe Element
Nothing -> Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Map CellRef XlsxCell
forall k a. Map k a
M.empty
Just Element
sheetData -> do
let rowElems :: [Element]
rowElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"row") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheetData)
cellElems :: [Element]
cellElems = (Element -> [Element]) -> [Element] -> [Element]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Element
r -> (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"c") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
r)) [Element]
rowElems
cells :: [XlsxCell]
cells = (Element -> Maybe XlsxCell) -> [Element] -> [XlsxCell]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell SharedStrings
sharedStrings Styles
styles) [Element]
cellElems
Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell))
-> Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a b. (a -> b) -> a -> b
$ [(CellRef, XlsxCell)] -> Map CellRef XlsxCell
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(XlsxCell -> CellRef
cellRef XlsxCell
c, XlsxCell
c) | XlsxCell
c <- [XlsxCell]
cells]
parseCell :: SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell :: SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell SharedStrings
sharedStrings Styles
styles Element
cElem = do
Text
refText <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"r") Element
cElem
CellRef
cellRefParsed <- (Text -> Maybe CellRef)
-> (CellRef -> Maybe CellRef)
-> Either Text CellRef
-> Maybe CellRef
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe CellRef -> Text -> Maybe CellRef
forall a b. a -> b -> a
const Maybe CellRef
forall a. Maybe a
Nothing) CellRef -> Maybe CellRef
forall a. a -> Maybe a
Just (Either Text CellRef -> Maybe CellRef)
-> Either Text CellRef -> Maybe CellRef
forall a b. (a -> b) -> a -> b
$ Text -> Either Text CellRef
parseCellRef Text
refText
let cellType :: Text
cellType = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"t") Element
cElem
styleIdx :: Maybe Int
styleIdx = QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"s") Element
cElem Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Maybe Int
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Maybe Int) -> (Text -> [Char]) -> Text -> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
T.unpack
let vElem :: Maybe Element
vElem = (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"v") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
cElem)
vText :: Text
vText = Text -> (Element -> Text) -> Maybe Element -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Element -> Text
strContent Maybe Element
vElem
let value :: CellValue
value = if Text
cellType Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"s"
then
case [Char] -> Maybe Int
forall a. Read a => [Char] -> Maybe a
readMaybe (Text -> [Char]
T.unpack Text
vText) of
Just Int
idx | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< SharedStrings -> Int
forall a. Vector a -> Int
V.length SharedStrings
sharedStrings ->
Text -> CellValue
TextValue (SharedStrings
sharedStrings SharedStrings -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
idx)
Maybe Int
_ -> CellValue
EmptyValue
else if Text -> Bool
T.null Text
vText
then CellValue
EmptyValue
else
case [Char] -> Maybe Double
forall a. Read a => [Char] -> Maybe a
readMaybe (Text -> [Char]
T.unpack Text
vText) of
Just Double
n -> Double -> CellValue
NumberValue Double
n
Maybe Double
Nothing -> Text -> CellValue
TextValue Text
vText
let (Bool
bold, Bool
italic) = case Maybe Int
styleIdx of
Just Int
idx | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Vector FontInfo -> Int
forall a. Vector a -> Int
V.length (Styles -> Vector FontInfo
styleFonts Styles
styles) ->
let font :: FontInfo
font = Styles -> Vector FontInfo
styleFonts Styles
styles Vector FontInfo -> Int -> FontInfo
forall a. Vector a -> Int -> a
V.! Int
idx
in (FontInfo -> Bool
fontBold FontInfo
font, FontInfo -> Bool
fontItalic FontInfo
font)
Maybe Int
_ -> (Bool
False, Bool
False)
XlsxCell -> Maybe XlsxCell
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxCell -> Maybe XlsxCell) -> XlsxCell -> Maybe XlsxCell
forall a b. (a -> b) -> a -> b
$ CellRef -> CellValue -> Bool -> Bool -> XlsxCell
XlsxCell CellRef
cellRefParsed CellValue
value Bool
bold Bool
italic
loadXMLFromArchive :: Archive -> FilePath -> Either Text Element
loadXMLFromArchive :: Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path = do
Entry
entry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Entry not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
path) (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
[Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
path Archive
archive
Entry -> Either Text Element
parseXMLFromEntry Entry
entry
parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry Entry
entry =
let lazyText :: Text
lazyText = ByteString -> Text
TL.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ Entry -> ByteString
fromEntry Entry
entry
in Text -> Either Text Element
parseXMLElement Text
lazyText
loadRelationships :: Archive -> FilePath -> Either Text [(Text, Text)]
loadRelationships :: Archive -> [Char] -> Either Text [(Text, Text)]
loadRelationships Archive
archive [Char]
relsPath =
case [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
relsPath Archive
archive of
Maybe Entry
Nothing -> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. b -> Either a b
Right []
Just Entry
entry -> do
Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
entry
let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
[(Text, Text)] -> Either Text [(Text, Text)]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Text, Text)] -> Either Text [(Text, Text)])
-> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ (Element -> Maybe (Text, Text)) -> [Element] -> [(Text, Text)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Element -> Maybe (Text, Text)
extractRel [Element]
relElems
where
extractRel :: Element -> Maybe (Text, Text)
extractRel Element
el = do
Text
relId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Id") Element
el
Text
target <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
el
(Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
relId, Text
target)
relsPathFor :: FilePath -> FilePath
relsPathFor :: ShowS
relsPathFor [Char]
path =
let ([Char]
dir, [Char]
file) = [Char] -> ([Char], [Char])
splitFileName [Char]
path
in [Char]
dir [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"/_rels/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
file [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
".rels"
findRelWithTarget :: [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget :: [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
rels Text
targetName =
((Text, Text) -> Bool) -> [(Text, Text)] -> Maybe (Text, Text)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\(Text
_, Text
target) -> Text
targetName Text -> Text -> Bool
`T.isInfixOf` Text
target) [(Text, Text)]
rels
maybeToEither :: Text -> Maybe a -> Either Text a
maybeToEither :: forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
err Maybe a
Nothing = Text -> Either Text a
forall a b. a -> Either a b
Left Text
err
maybeToEither Text
_ (Just a
x) = a -> Either Text a
forall a b. b -> Either a b
Right a
x
getAllText :: Element -> Text
getAllText :: Element -> Text
getAllText Element
el =
let textFromContent :: Content -> Text
textFromContent (Text CData
cdata) = CData -> Text
cdData CData
cdata
textFromContent (Elem Element
e) = Element -> Text
getAllText Element
e
textFromContent Content
_ = Text
""
texts :: [Text]
texts = (Content -> Text) -> [Content] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Content -> Text
textFromContent (Element -> [Content]
elContent Element
el)
in [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
texts
addContext :: Either Text a -> Text -> Either Text a
addContext :: forall a. Either Text a -> Text -> Either Text a
addContext (Right a
x) Text
_ = a -> Either Text a
forall a b. b -> Either a b
Right a
x
addContext (Left Text
err) Text
ctx = Text -> Either Text a
forall a b. a -> Either a b
Left (Text
err Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (context: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
ctx Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
")")