{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Xlsx.Parse
   Copyright   : © 2025 Anton Antic
   License     : GNU GPL, version 2 or above

   Maintainer  : Anton Antic <anton@everworker.ai>
   Stability   : alpha
   Portability : portable

Parsing of XLSX archive to intermediate representation.
-}
module Text.Pandoc.Readers.Xlsx.Parse
  ( Xlsx(..)
  , XlsxWorkbook(..)
  , XlsxSheet(..)
  , SheetId(..)
  , SharedStrings
  , Styles(..)
  , FontInfo(..)
  , archiveToXlsx
  ) where

import Codec.Archive.Zip (Archive, Entry, findEntryByPath, fromEntry)
import Data.List (find)
import qualified Data.Map.Strict as M
import Data.Maybe (mapMaybe, fromMaybe)
import qualified Data.Text as T
import qualified Data.Text.Lazy.Encoding as TL
import Data.Text (Text)
import qualified Data.Vector as V
import System.FilePath (splitFileName)
import Text.Pandoc.Readers.OOXML.Shared
import Text.Pandoc.Readers.Xlsx.Cells
import Text.Pandoc.XML.Light
import Text.Read (readMaybe)

-- | Sheet identifier
newtype SheetId = SheetId Int deriving (Int -> SheetId -> ShowS
[SheetId] -> ShowS
SheetId -> [Char]
(Int -> SheetId -> ShowS)
-> (SheetId -> [Char]) -> ([SheetId] -> ShowS) -> Show SheetId
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SheetId -> ShowS
showsPrec :: Int -> SheetId -> ShowS
$cshow :: SheetId -> [Char]
show :: SheetId -> [Char]
$cshowList :: [SheetId] -> ShowS
showList :: [SheetId] -> ShowS
Show, SheetId -> SheetId -> Bool
(SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool) -> Eq SheetId
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SheetId -> SheetId -> Bool
== :: SheetId -> SheetId -> Bool
$c/= :: SheetId -> SheetId -> Bool
/= :: SheetId -> SheetId -> Bool
Eq, Eq SheetId
Eq SheetId =>
(SheetId -> SheetId -> Ordering)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> Bool)
-> (SheetId -> SheetId -> SheetId)
-> (SheetId -> SheetId -> SheetId)
-> Ord SheetId
SheetId -> SheetId -> Bool
SheetId -> SheetId -> Ordering
SheetId -> SheetId -> SheetId
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: SheetId -> SheetId -> Ordering
compare :: SheetId -> SheetId -> Ordering
$c< :: SheetId -> SheetId -> Bool
< :: SheetId -> SheetId -> Bool
$c<= :: SheetId -> SheetId -> Bool
<= :: SheetId -> SheetId -> Bool
$c> :: SheetId -> SheetId -> Bool
> :: SheetId -> SheetId -> Bool
$c>= :: SheetId -> SheetId -> Bool
>= :: SheetId -> SheetId -> Bool
$cmax :: SheetId -> SheetId -> SheetId
max :: SheetId -> SheetId -> SheetId
$cmin :: SheetId -> SheetId -> SheetId
min :: SheetId -> SheetId -> SheetId
Ord)

-- | Shared strings table (Vector for O(1) lookup)
type SharedStrings = V.Vector Text

-- | Font information
data FontInfo = FontInfo
  { FontInfo -> Bool
fontBold :: Bool
  , FontInfo -> Bool
fontItalic :: Bool
  , FontInfo -> Bool
fontUnderline :: Bool
  } deriving (Int -> FontInfo -> ShowS
[FontInfo] -> ShowS
FontInfo -> [Char]
(Int -> FontInfo -> ShowS)
-> (FontInfo -> [Char]) -> ([FontInfo] -> ShowS) -> Show FontInfo
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> FontInfo -> ShowS
showsPrec :: Int -> FontInfo -> ShowS
$cshow :: FontInfo -> [Char]
show :: FontInfo -> [Char]
$cshowList :: [FontInfo] -> ShowS
showList :: [FontInfo] -> ShowS
Show)

-- | Style information
data Styles = Styles
  { Styles -> Vector FontInfo
styleFonts :: V.Vector FontInfo
  } deriving (Int -> Styles -> ShowS
[Styles] -> ShowS
Styles -> [Char]
(Int -> Styles -> ShowS)
-> (Styles -> [Char]) -> ([Styles] -> ShowS) -> Show Styles
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Styles -> ShowS
showsPrec :: Int -> Styles -> ShowS
$cshow :: Styles -> [Char]
show :: Styles -> [Char]
$cshowList :: [Styles] -> ShowS
showList :: [Styles] -> ShowS
Show)

-- | Complete XLSX document
data Xlsx = Xlsx
  { Xlsx -> XlsxWorkbook
xlsxWorkbook :: XlsxWorkbook
  , Xlsx -> [XlsxSheet]
xlsxSheets :: [XlsxSheet]
  , Xlsx -> SharedStrings
xlsxSharedStrings :: SharedStrings
  , Xlsx -> Styles
xlsxStyles :: Styles
  } deriving (Int -> Xlsx -> ShowS
[Xlsx] -> ShowS
Xlsx -> [Char]
(Int -> Xlsx -> ShowS)
-> (Xlsx -> [Char]) -> ([Xlsx] -> ShowS) -> Show Xlsx
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Xlsx -> ShowS
showsPrec :: Int -> Xlsx -> ShowS
$cshow :: Xlsx -> [Char]
show :: Xlsx -> [Char]
$cshowList :: [Xlsx] -> ShowS
showList :: [Xlsx] -> ShowS
Show)

-- | Workbook information
data XlsxWorkbook = XlsxWorkbook
  { XlsxWorkbook -> [(SheetId, Text, Text)]
workbookSheetNames :: [(SheetId, Text, Text)]  -- (id, name, relId)
  } deriving (Int -> XlsxWorkbook -> ShowS
[XlsxWorkbook] -> ShowS
XlsxWorkbook -> [Char]
(Int -> XlsxWorkbook -> ShowS)
-> (XlsxWorkbook -> [Char])
-> ([XlsxWorkbook] -> ShowS)
-> Show XlsxWorkbook
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XlsxWorkbook -> ShowS
showsPrec :: Int -> XlsxWorkbook -> ShowS
$cshow :: XlsxWorkbook -> [Char]
show :: XlsxWorkbook -> [Char]
$cshowList :: [XlsxWorkbook] -> ShowS
showList :: [XlsxWorkbook] -> ShowS
Show)

-- | Individual worksheet
data XlsxSheet = XlsxSheet
  { XlsxSheet -> SheetId
sheetId :: SheetId
  , XlsxSheet -> Text
sheetName :: Text
  , XlsxSheet -> Map CellRef XlsxCell
sheetCells :: M.Map CellRef XlsxCell
  } deriving (Int -> XlsxSheet -> ShowS
[XlsxSheet] -> ShowS
XlsxSheet -> [Char]
(Int -> XlsxSheet -> ShowS)
-> (XlsxSheet -> [Char])
-> ([XlsxSheet] -> ShowS)
-> Show XlsxSheet
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> XlsxSheet -> ShowS
showsPrec :: Int -> XlsxSheet -> ShowS
$cshow :: XlsxSheet -> [Char]
show :: XlsxSheet -> [Char]
$cshowList :: [XlsxSheet] -> ShowS
showList :: [XlsxSheet] -> ShowS
Show)

-- | Parse XLSX archive
archiveToXlsx :: Archive -> Either Text Xlsx
archiveToXlsx :: Archive -> Either Text Xlsx
archiveToXlsx Archive
archive = do
  -- Find and parse workbook.xml
  [Char]
workbookPath <- Archive -> Either Text [Char]
getWorkbookXmlPath Archive
archive
  Element
workbookElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
workbookPath
  XlsxWorkbook
workbook <- Element -> Either Text XlsxWorkbook
parseWorkbook Element
workbookElem
    Either Text XlsxWorkbook -> Text -> Either Text XlsxWorkbook
forall a. Either Text a -> Text -> Either Text a
`addContext` (Text
"Parsing workbook.xml from: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
workbookPath)

  -- Load workbook relationships
  [(Text, Text)]
workbookRels <- Archive -> [Char] -> Either Text [(Text, Text)]
loadRelationships Archive
archive (ShowS
relsPathFor [Char]
workbookPath)

  -- Parse shared strings (look for sharedStrings relationship)
  SharedStrings
sharedStrings <- case [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
workbookRels Text
"sharedStrings" of
    Just (Text
_, Text
target) -> do
      let path :: [Char]
path = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
      Element
el <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path
      Element -> Either Text SharedStrings
parseSharedStrings Element
el
    Maybe (Text, Text)
Nothing -> SharedStrings -> Either Text SharedStrings
forall a b. b -> Either a b
Right SharedStrings
forall a. Vector a
V.empty

  -- Parse styles
  Styles
styles <- case [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
workbookRels Text
"styles" of
    Just (Text
_, Text
target) -> do
      let path :: [Char]
path = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
      Element
el <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path
      Element -> Either Text Styles
parseStyles Element
el
    Maybe (Text, Text)
Nothing -> Styles -> Either Text Styles
forall a b. b -> Either a b
Right (Styles -> Either Text Styles) -> Styles -> Either Text Styles
forall a b. (a -> b) -> a -> b
$ Vector FontInfo -> Styles
Styles Vector FontInfo
forall a. Vector a
V.empty

  -- Parse worksheets
  [XlsxSheet]
sheets <- ((SheetId, Text, Text) -> Either Text XlsxSheet)
-> [(SheetId, Text, Text)] -> Either Text [XlsxSheet]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (\(SheetId, Text, Text)
sheetInfo -> Archive
-> [(Text, Text)]
-> SharedStrings
-> Styles
-> (SheetId, Text, Text)
-> Either Text XlsxSheet
parseSheet Archive
archive [(Text, Text)]
workbookRels SharedStrings
sharedStrings Styles
styles (SheetId, Text, Text)
sheetInfo)
                 (XlsxWorkbook -> [(SheetId, Text, Text)]
workbookSheetNames XlsxWorkbook
workbook)

  Xlsx -> Either Text Xlsx
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Xlsx -> Either Text Xlsx) -> Xlsx -> Either Text Xlsx
forall a b. (a -> b) -> a -> b
$ XlsxWorkbook -> [XlsxSheet] -> SharedStrings -> Styles -> Xlsx
Xlsx XlsxWorkbook
workbook [XlsxSheet]
sheets SharedStrings
sharedStrings Styles
styles

-- | Find workbook.xml via root relationships
getWorkbookXmlPath :: Archive -> Either Text FilePath
getWorkbookXmlPath :: Archive -> Either Text [Char]
getWorkbookXmlPath Archive
archive = do
  Entry
relsEntry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing _rels/.rels" (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
               [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
"_rels/.rels" Archive
archive
  Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
relsEntry

  let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
  case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find Element -> Bool
isOfficeDocRel [Element]
relElems of
    Maybe Element
Nothing -> Text -> Either Text [Char]
forall a b. a -> Either a b
Left Text
"No workbook.xml relationship found"
    Just Element
rel -> do
      Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing Target" (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
rel
      [Char] -> Either Text [Char]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> Either Text [Char]) -> [Char] -> Either Text [Char]
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
target
  where
    isOfficeDocRel :: Element -> Bool
isOfficeDocRel Element
el =
      case (QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Type") Element
el, QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
el) of
        (Just Text
relType, Just Text
target) ->
          Text
"officeDocument" Text -> Text -> Bool
`T.isInfixOf` Text
relType Bool -> Bool -> Bool
&& Text
"workbook" Text -> Text -> Bool
`T.isInfixOf` Text
target
        (Maybe Text, Maybe Text)
_ -> Bool
False

-- | Parse workbook.xml
parseWorkbook :: Element -> Either Text XlsxWorkbook
parseWorkbook :: Element -> Either Text XlsxWorkbook
parseWorkbook Element
wbElem = do
  let ns :: NameSpaces
ns = Element -> NameSpaces
elemToNameSpaces Element
wbElem

  -- Find sheets element (match by local name only)
  Element
sheets <- Text -> Maybe Element -> Either Text Element
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing <sheets>" (Maybe Element -> Either Text Element)
-> Maybe Element -> Either Text Element
forall a b. (a -> b) -> a -> b
$
            (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheets") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
wbElem)

  let sheetElems :: [Element]
sheetElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheet") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheets)
  [(SheetId, Text, Text)]
sheetRefs <- ((Int, Element) -> Either Text (SheetId, Text, Text))
-> [(Int, Element)] -> Either Text [(SheetId, Text, Text)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef NameSpaces
ns) ([Int] -> [Element] -> [(Int, Element)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..] [Element]
sheetElems)

  XlsxWorkbook -> Either Text XlsxWorkbook
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxWorkbook -> Either Text XlsxWorkbook)
-> XlsxWorkbook -> Either Text XlsxWorkbook
forall a b. (a -> b) -> a -> b
$ [(SheetId, Text, Text)] -> XlsxWorkbook
XlsxWorkbook [(SheetId, Text, Text)]
sheetRefs

parseSheetRef :: NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef :: NameSpaces -> (Int, Element) -> Either Text (SheetId, Text, Text)
parseSheetRef NameSpaces
ns (Int
idx, Element
sheetElem) = do
  let name :: Text
name = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe (Text
"Sheet" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
idx)) (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$
             QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"name") Element
sheetElem
  Text
relId <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
"Missing r:id" (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
           NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName NameSpaces
ns Text
"r" Text
"id" Element
sheetElem
  (SheetId, Text, Text) -> Either Text (SheetId, Text, Text)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> SheetId
SheetId Int
idx, Text
name, Text
relId)

-- | Parse shared strings
parseSharedStrings :: Element -> Either Text SharedStrings
parseSharedStrings :: Element -> Either Text SharedStrings
parseSharedStrings Element
sstElem = do
  let siElems :: [Element]
siElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"si") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sstElem)
      strings :: [Text]
strings = (Element -> Text) -> [Element] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Element -> Text
extractString [Element]
siElems
  SharedStrings -> Either Text SharedStrings
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (SharedStrings -> Either Text SharedStrings)
-> SharedStrings -> Either Text SharedStrings
forall a b. (a -> b) -> a -> b
$ [Text] -> SharedStrings
forall a. [a] -> Vector a
V.fromList [Text]
strings
  where
    extractString :: Element -> Text
extractString Element
siElem =
      case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"t") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
siElem) of
        Just Element
tElem -> Element -> Text
strContent Element
tElem
        Maybe Element
Nothing -> Element -> Text
getAllText Element
siElem

-- | Parse styles (fonts only for MVP)
parseStyles :: Element -> Either Text Styles
parseStyles :: Element -> Either Text Styles
parseStyles Element
stylesElem = do
  -- Parse fonts (match by local name)
  let fontsElem :: Maybe Element
fontsElem = (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"fonts") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
stylesElem)
      fontElems :: [Element]
fontElems = [Element] -> (Element -> [Element]) -> Maybe Element -> [Element]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] (\Element
fe -> (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"font") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fe)) Maybe Element
fontsElem
      fonts :: Vector FontInfo
fonts = [FontInfo] -> Vector FontInfo
forall a. [a] -> Vector a
V.fromList ([FontInfo] -> Vector FontInfo) -> [FontInfo] -> Vector FontInfo
forall a b. (a -> b) -> a -> b
$ (Element -> FontInfo) -> [Element] -> [FontInfo]
forall a b. (a -> b) -> [a] -> [b]
map (NameSpaces -> Element -> FontInfo
parseFont NameSpaces
forall a. Monoid a => a
mempty) [Element]
fontElems

  Styles -> Either Text Styles
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Styles -> Either Text Styles) -> Styles -> Either Text Styles
forall a b. (a -> b) -> a -> b
$ Vector FontInfo -> Styles
Styles Vector FontInfo
fonts

parseFont :: NameSpaces -> Element -> FontInfo
parseFont :: NameSpaces -> Element -> FontInfo
parseFont NameSpaces
_ns Element
fontElem =
  FontInfo
    { fontBold :: Bool
fontBold = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"b") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
    , fontItalic :: Bool
fontItalic = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"i") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
    , fontUnderline :: Bool
fontUnderline = (Element -> Bool) -> [Element] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"u") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
fontElem)
    }

-- | Parse individual worksheet
parseSheet :: Archive -> [(Text, Text)] -> SharedStrings -> Styles -> (SheetId, Text, Text) -> Either Text XlsxSheet
parseSheet :: Archive
-> [(Text, Text)]
-> SharedStrings
-> Styles
-> (SheetId, Text, Text)
-> Either Text XlsxSheet
parseSheet Archive
archive [(Text, Text)]
rels SharedStrings
sharedStrings Styles
styles (SheetId
sid, Text
name, Text
relId) = do
  Text
target <- Text -> Maybe Text -> Either Text Text
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Sheet relationship not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
relId) (Maybe Text -> Either Text Text) -> Maybe Text -> Either Text Text
forall a b. (a -> b) -> a -> b
$
            Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
relId [(Text, Text)]
rels

  let sheetPath :: [Char]
sheetPath = [Char]
"xl/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Text -> [Char]
T.unpack Text
target
  Element
sheetElem <- Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
sheetPath

  Map CellRef XlsxCell
cells <- Element
-> SharedStrings -> Styles -> Either Text (Map CellRef XlsxCell)
parseSheetCells Element
sheetElem SharedStrings
sharedStrings Styles
styles

  XlsxSheet -> Either Text XlsxSheet
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxSheet -> Either Text XlsxSheet)
-> XlsxSheet -> Either Text XlsxSheet
forall a b. (a -> b) -> a -> b
$ SheetId -> Text -> Map CellRef XlsxCell -> XlsxSheet
XlsxSheet SheetId
sid Text
name Map CellRef XlsxCell
cells

-- | Parse sheet cells
parseSheetCells :: Element -> SharedStrings -> Styles -> Either Text (M.Map CellRef XlsxCell)
parseSheetCells :: Element
-> SharedStrings -> Styles -> Either Text (Map CellRef XlsxCell)
parseSheetCells Element
sheetElem SharedStrings
sharedStrings Styles
styles = do
  -- Find sheetData by local name
  case (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"sheetData") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheetElem) of
    Maybe Element
Nothing -> Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return Map CellRef XlsxCell
forall k a. Map k a
M.empty
    Just Element
sheetData -> do
      let rowElems :: [Element]
rowElems = (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"row") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
sheetData)
          cellElems :: [Element]
cellElems = (Element -> [Element]) -> [Element] -> [Element]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Element
r -> (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"c") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
r)) [Element]
rowElems
          cells :: [XlsxCell]
cells = (Element -> Maybe XlsxCell) -> [Element] -> [XlsxCell]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell SharedStrings
sharedStrings Styles
styles) [Element]
cellElems
      Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell))
-> Map CellRef XlsxCell -> Either Text (Map CellRef XlsxCell)
forall a b. (a -> b) -> a -> b
$ [(CellRef, XlsxCell)] -> Map CellRef XlsxCell
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(XlsxCell -> CellRef
cellRef XlsxCell
c, XlsxCell
c) | XlsxCell
c <- [XlsxCell]
cells]

-- | Parse individual cell
parseCell :: SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell :: SharedStrings -> Styles -> Element -> Maybe XlsxCell
parseCell SharedStrings
sharedStrings Styles
styles Element
cElem = do
  -- Get cell reference
  Text
refText <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"r") Element
cElem
  CellRef
cellRefParsed <- (Text -> Maybe CellRef)
-> (CellRef -> Maybe CellRef)
-> Either Text CellRef
-> Maybe CellRef
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe CellRef -> Text -> Maybe CellRef
forall a b. a -> b -> a
const Maybe CellRef
forall a. Maybe a
Nothing) CellRef -> Maybe CellRef
forall a. a -> Maybe a
Just (Either Text CellRef -> Maybe CellRef)
-> Either Text CellRef -> Maybe CellRef
forall a b. (a -> b) -> a -> b
$ Text -> Either Text CellRef
parseCellRef Text
refText

  -- Get cell type (default to number if missing)
  let cellType :: Text
cellType = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"t") Element
cElem
      styleIdx :: Maybe Int
styleIdx = QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"s") Element
cElem Maybe Text -> (Text -> Maybe Int) -> Maybe Int
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Char] -> Maybe Int
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Maybe Int) -> (Text -> [Char]) -> Text -> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
T.unpack

  -- Get value (match by local name)
  let vElem :: Maybe Element
vElem = (Element -> Bool) -> [Element] -> Maybe Element
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Element
e -> QName -> Text
qName (Element -> QName
elName Element
e) Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"v") ([Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
cElem)
      vText :: Text
vText = Text -> (Element -> Text) -> Maybe Element -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" Element -> Text
strContent Maybe Element
vElem

  -- Parse value based on type
  let value :: CellValue
value = if Text
cellType Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"s"
              then
                -- Shared string
                case [Char] -> Maybe Int
forall a. Read a => [Char] -> Maybe a
readMaybe (Text -> [Char]
T.unpack Text
vText) of
                  Just Int
idx | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< SharedStrings -> Int
forall a. Vector a -> Int
V.length SharedStrings
sharedStrings ->
                    Text -> CellValue
TextValue (SharedStrings
sharedStrings SharedStrings -> Int -> Text
forall a. Vector a -> Int -> a
V.! Int
idx)
                  Maybe Int
_ -> CellValue
EmptyValue
              else if Text -> Bool
T.null Text
vText
                then CellValue
EmptyValue
                else
                  -- Number
                  case [Char] -> Maybe Double
forall a. Read a => [Char] -> Maybe a
readMaybe (Text -> [Char]
T.unpack Text
vText) of
                    Just Double
n -> Double -> CellValue
NumberValue Double
n
                    Maybe Double
Nothing -> Text -> CellValue
TextValue Text
vText

  -- Get formatting from style
  let (Bool
bold, Bool
italic) = case Maybe Int
styleIdx of
        Just Int
idx | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 Bool -> Bool -> Bool
&& Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Vector FontInfo -> Int
forall a. Vector a -> Int
V.length (Styles -> Vector FontInfo
styleFonts Styles
styles) ->
          let font :: FontInfo
font = Styles -> Vector FontInfo
styleFonts Styles
styles Vector FontInfo -> Int -> FontInfo
forall a. Vector a -> Int -> a
V.! Int
idx
           in (FontInfo -> Bool
fontBold FontInfo
font, FontInfo -> Bool
fontItalic FontInfo
font)
        Maybe Int
_ -> (Bool
False, Bool
False)

  XlsxCell -> Maybe XlsxCell
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (XlsxCell -> Maybe XlsxCell) -> XlsxCell -> Maybe XlsxCell
forall a b. (a -> b) -> a -> b
$ CellRef -> CellValue -> Bool -> Bool -> XlsxCell
XlsxCell CellRef
cellRefParsed CellValue
value Bool
bold Bool
italic

-- Helper functions
loadXMLFromArchive :: Archive -> FilePath -> Either Text Element
loadXMLFromArchive :: Archive -> [Char] -> Either Text Element
loadXMLFromArchive Archive
archive [Char]
path = do
  Entry
entry <- Text -> Maybe Entry -> Either Text Entry
forall a. Text -> Maybe a -> Either Text a
maybeToEither (Text
"Entry not found: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
path) (Maybe Entry -> Either Text Entry)
-> Maybe Entry -> Either Text Entry
forall a b. (a -> b) -> a -> b
$
           [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
path Archive
archive
  Entry -> Either Text Element
parseXMLFromEntry Entry
entry

parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry :: Entry -> Either Text Element
parseXMLFromEntry Entry
entry =
  let lazyText :: Text
lazyText = ByteString -> Text
TL.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ Entry -> ByteString
fromEntry Entry
entry
   in Text -> Either Text Element
parseXMLElement Text
lazyText

loadRelationships :: Archive -> FilePath -> Either Text [(Text, Text)]
loadRelationships :: Archive -> [Char] -> Either Text [(Text, Text)]
loadRelationships Archive
archive [Char]
relsPath =
  case [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
relsPath Archive
archive of
    Maybe Entry
Nothing -> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. b -> Either a b
Right []
    Just Entry
entry -> do
      Element
relsElem <- Entry -> Either Text Element
parseXMLFromEntry Entry
entry
      let relElems :: [Element]
relElems = [Content] -> [Element]
onlyElems ([Content] -> [Element]) -> [Content] -> [Element]
forall a b. (a -> b) -> a -> b
$ Element -> [Content]
elContent Element
relsElem
      [(Text, Text)] -> Either Text [(Text, Text)]
forall a. a -> Either Text a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Text, Text)] -> Either Text [(Text, Text)])
-> [(Text, Text)] -> Either Text [(Text, Text)]
forall a b. (a -> b) -> a -> b
$ (Element -> Maybe (Text, Text)) -> [Element] -> [(Text, Text)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Element -> Maybe (Text, Text)
extractRel [Element]
relElems
  where
    extractRel :: Element -> Maybe (Text, Text)
extractRel Element
el = do
      Text
relId <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Id") Element
el
      Text
target <- QName -> Element -> Maybe Text
findAttr (Text -> QName
unqual Text
"Target") Element
el
      (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
relId, Text
target)

relsPathFor :: FilePath -> FilePath
relsPathFor :: ShowS
relsPathFor [Char]
path =
  let ([Char]
dir, [Char]
file) = [Char] -> ([Char], [Char])
splitFileName [Char]
path
   in [Char]
dir [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
"/_rels/" [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
file [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
".rels"

findRelWithTarget :: [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget :: [(Text, Text)] -> Text -> Maybe (Text, Text)
findRelWithTarget [(Text, Text)]
rels Text
targetName =
  ((Text, Text) -> Bool) -> [(Text, Text)] -> Maybe (Text, Text)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\(Text
_, Text
target) -> Text
targetName Text -> Text -> Bool
`T.isInfixOf` Text
target) [(Text, Text)]
rels

maybeToEither :: Text -> Maybe a -> Either Text a
maybeToEither :: forall a. Text -> Maybe a -> Either Text a
maybeToEither Text
err Maybe a
Nothing = Text -> Either Text a
forall a b. a -> Either a b
Left Text
err
maybeToEither Text
_ (Just a
x) = a -> Either Text a
forall a b. b -> Either a b
Right a
x

getAllText :: Element -> Text
getAllText :: Element -> Text
getAllText Element
el =
  let textFromContent :: Content -> Text
textFromContent (Text CData
cdata) = CData -> Text
cdData CData
cdata
      textFromContent (Elem Element
e) = Element -> Text
getAllText Element
e
      textFromContent Content
_ = Text
""
      texts :: [Text]
texts = (Content -> Text) -> [Content] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map Content -> Text
textFromContent (Element -> [Content]
elContent Element
el)
   in [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
texts

addContext :: Either Text a -> Text -> Either Text a
addContext :: forall a. Either Text a -> Text -> Either Text a
addContext (Right a
x) Text
_ = a -> Either Text a
forall a b. b -> Either a b
Right a
x
addContext (Left Text
err) Text
ctx = Text -> Either Text a
forall a b. a -> Either a b
Left (Text
err Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" (context: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
ctx Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
")")