{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.Xlsx.Sheets
   Copyright   : © 2025 Anton Antic
   License     : GNU GPL, version 2 or above

   Maintainer  : Anton Antic <anton@everworker.ai>
   Stability   : alpha
   Portability : portable

Conversion of XLSX sheets to Pandoc AST.
-}
module Text.Pandoc.Readers.Xlsx.Sheets
  ( xlsxToOutput
  ) where

import qualified Data.Map.Strict as M
import qualified Data.Text as T
import Data.List (sort, dropWhileEnd)
import Data.Char (isSpace)
import Text.Pandoc.Definition
import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Readers.Xlsx.Parse
import Text.Pandoc.Readers.Xlsx.Cells
import qualified Text.Pandoc.Builder as B

-- | Convert XLSX to Pandoc output
xlsxToOutput :: ReaderOptions -> Xlsx -> (Meta, [Block])
xlsxToOutput :: ReaderOptions -> Xlsx -> (Meta, [Block])
xlsxToOutput ReaderOptions
_opts Xlsx
xlsx =
  let sheets :: [XlsxSheet]
sheets = Xlsx -> [XlsxSheet]
xlsxSheets Xlsx
xlsx
      sheetBlocks :: [Block]
sheetBlocks = (XlsxSheet -> [Block]) -> [XlsxSheet] -> [Block]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap XlsxSheet -> [Block]
sheetToBlocks [XlsxSheet]
sheets
   in (Meta
forall a. Monoid a => a
mempty, [Block]
sheetBlocks)

-- | Convert sheet to blocks (header + table)
sheetToBlocks :: XlsxSheet -> [Block]
sheetToBlocks :: XlsxSheet -> [Block]
sheetToBlocks XlsxSheet
sheet =
  let SheetId Int
n = XlsxSheet -> SheetId
sheetId XlsxSheet
sheet
      name :: Text
name = XlsxSheet -> Text
sheetName XlsxSheet
sheet
      sheetIdent :: Text
sheetIdent = Text
"sheet-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack (Int -> String
forall a. Show a => a -> String
show Int
n)
      header :: Block
header = Int -> Attr -> [Inline] -> Block
Header Int
2 (Text
sheetIdent, [], []) (Many Inline -> [Inline]
forall a. Many a -> [a]
B.toList (Text -> Many Inline
B.text Text
name))

      -- Convert cells to table
      tableBlock :: [Block]
tableBlock = case XlsxSheet -> Maybe Block
cellsToTable XlsxSheet
sheet of
        Just Block
tbl -> [Block
tbl]
        Maybe Block
Nothing -> []  -- Empty sheet
   in Block
header Block -> [Block] -> [Block]
forall a. a -> [a] -> [a]
: [Block]
tableBlock

-- | Convert cells to Pandoc Table
cellsToTable :: XlsxSheet -> Maybe Block
cellsToTable :: XlsxSheet -> Maybe Block
cellsToTable XlsxSheet
sheet
  | Map CellRef XlsxCell -> Bool
forall k a. Map k a -> Bool
M.null (XlsxSheet -> Map CellRef XlsxCell
sheetCells XlsxSheet
sheet) = Maybe Block
forall a. Maybe a
Nothing
  | Bool
otherwise =
      let cells :: Map CellRef XlsxCell
cells = XlsxSheet -> Map CellRef XlsxCell
sheetCells XlsxSheet
sheet
          -- Get bounds
          refs :: [CellRef]
refs = [CellRef] -> [CellRef]
forall a. Ord a => [a] -> [a]
sort ([CellRef] -> [CellRef]) -> [CellRef] -> [CellRef]
forall a b. (a -> b) -> a -> b
$ Map CellRef XlsxCell -> [CellRef]
forall k a. Map k a -> [k]
M.keys Map CellRef XlsxCell
cells
          minCol :: Int
minCol = [Int] -> Int
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (CellRef -> Int) -> [CellRef] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map CellRef -> Int
cellRefCol [CellRef]
refs
          maxCol :: Int
maxCol = [Int] -> Int
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (CellRef -> Int) -> [CellRef] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map CellRef -> Int
cellRefCol [CellRef]
refs
          minRow :: Int
minRow = [Int] -> Int
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (CellRef -> Int) -> [CellRef] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map CellRef -> Int
cellRefRow [CellRef]
refs
          maxRow :: Int
maxRow = [Int] -> Int
forall a. Ord a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (CellRef -> Int) -> [CellRef] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map CellRef -> Int
cellRefRow [CellRef]
refs

          -- Build dense grid
          grid :: [[Maybe XlsxCell]]
grid = [ [ CellRef -> Map CellRef XlsxCell -> Maybe XlsxCell
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Int -> Int -> CellRef
CellRef Int
col Int
row) Map CellRef XlsxCell
cells
                   | Int
col <- [Int
minCol..Int
maxCol]
                   ]
                 | Int
row <- [Int
minRow..Int
maxRow]
                 ]

          -- First row is header (simple heuristic)
          ([Maybe XlsxCell]
headerRow, [[Maybe XlsxCell]]
bodyRows) = case [[Maybe XlsxCell]]
grid of
            ([Maybe XlsxCell]
h:[[Maybe XlsxCell]]
bs) -> ([Maybe XlsxCell]
h, [[Maybe XlsxCell]]
bs)
            [] -> ([], [])

          -- Filter out trailing empty rows (rows with only whitespace)
          filteredBodyRows :: [[Maybe XlsxCell]]
filteredBodyRows = ([Maybe XlsxCell] -> Bool)
-> [[Maybe XlsxCell]] -> [[Maybe XlsxCell]]
forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd [Maybe XlsxCell] -> Bool
isEmptyRow [[Maybe XlsxCell]]
bodyRows

          makeCell :: Maybe XlsxCell -> Cell
makeCell Maybe XlsxCell
mcell = case Maybe XlsxCell
mcell of
            Just XlsxCell
cell ->
              let inlines :: [Inline]
inlines = XlsxCell -> [Inline]
cellToInlines XlsxCell
cell
               in Attr -> Alignment -> RowSpan -> ColSpan -> [Block] -> Cell
Cell Attr
nullAttr Alignment
AlignDefault (Int -> RowSpan
RowSpan Int
1) (Int -> ColSpan
ColSpan Int
1) [[Inline] -> Block
Plain [Inline]
inlines]
            Maybe XlsxCell
Nothing ->
              Attr -> Alignment -> RowSpan -> ColSpan -> [Block] -> Cell
Cell Attr
nullAttr Alignment
AlignDefault (Int -> RowSpan
RowSpan Int
1) (Int -> ColSpan
ColSpan Int
1) [[Inline] -> Block
Plain []]

          numCols :: Int
numCols = [Maybe XlsxCell] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Maybe XlsxCell]
headerRow
          colSpec :: [(Alignment, ColWidth)]
colSpec = Int -> (Alignment, ColWidth) -> [(Alignment, ColWidth)]
forall a. Int -> a -> [a]
replicate Int
numCols (Alignment
AlignDefault, ColWidth
ColWidthDefault)
          thead :: TableHead
thead = Attr -> [Row] -> TableHead
TableHead Attr
nullAttr [Attr -> [Cell] -> Row
Row Attr
nullAttr ([Cell] -> Row) -> [Cell] -> Row
forall a b. (a -> b) -> a -> b
$ (Maybe XlsxCell -> Cell) -> [Maybe XlsxCell] -> [Cell]
forall a b. (a -> b) -> [a] -> [b]
map Maybe XlsxCell -> Cell
makeCell [Maybe XlsxCell]
headerRow]
          tbody :: [TableBody]
tbody = [Attr -> RowHeadColumns -> [Row] -> [Row] -> TableBody
TableBody Attr
nullAttr RowHeadColumns
0 [] ([Row] -> TableBody) -> [Row] -> TableBody
forall a b. (a -> b) -> a -> b
$ ([Maybe XlsxCell] -> Row) -> [[Maybe XlsxCell]] -> [Row]
forall a b. (a -> b) -> [a] -> [b]
map (Attr -> [Cell] -> Row
Row Attr
nullAttr ([Cell] -> Row)
-> ([Maybe XlsxCell] -> [Cell]) -> [Maybe XlsxCell] -> Row
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe XlsxCell -> Cell) -> [Maybe XlsxCell] -> [Cell]
forall a b. (a -> b) -> [a] -> [b]
map Maybe XlsxCell -> Cell
makeCell) [[Maybe XlsxCell]]
filteredBodyRows]
          tfoot :: TableFoot
tfoot = Attr -> [Row] -> TableFoot
TableFoot Attr
nullAttr []

       in Block -> Maybe Block
forall a. a -> Maybe a
Just (Block -> Maybe Block) -> Block -> Maybe Block
forall a b. (a -> b) -> a -> b
$ Attr
-> Caption
-> [(Alignment, ColWidth)]
-> TableHead
-> [TableBody]
-> TableFoot
-> Block
Table Attr
nullAttr (Maybe [Inline] -> [Block] -> Caption
Caption Maybe [Inline]
forall a. Maybe a
Nothing []) [(Alignment, ColWidth)]
colSpec TableHead
thead [TableBody]
tbody TableFoot
tfoot

-- | Check if a row contains only whitespace or empty cells
isEmptyRow :: [Maybe XlsxCell] -> Bool
isEmptyRow :: [Maybe XlsxCell] -> Bool
isEmptyRow = (Maybe XlsxCell -> Bool) -> [Maybe XlsxCell] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Maybe XlsxCell -> Bool
isEmptyCell
  where
    isEmptyCell :: Maybe XlsxCell -> Bool
isEmptyCell Maybe XlsxCell
Nothing = Bool
True
    isEmptyCell (Just XlsxCell
cell) = case XlsxCell -> CellValue
cellValue XlsxCell
cell of
      CellValue
EmptyValue -> Bool
True
      TextValue Text
t -> (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
t
      NumberValue Double
_ -> Bool
False

-- | Convert cell to Pandoc inlines
cellToInlines :: XlsxCell -> [Inline]
cellToInlines :: XlsxCell -> [Inline]
cellToInlines XlsxCell
cell =
  let base :: [Inline]
base = case XlsxCell -> CellValue
cellValue XlsxCell
cell of
        TextValue Text
t -> Many Inline -> [Inline]
forall a. Many a -> [a]
B.toList (Many Inline -> [Inline]) -> Many Inline -> [Inline]
forall a b. (a -> b) -> a -> b
$ Text -> Many Inline
B.text Text
t
        NumberValue Double
n -> [Text -> Inline
Str (Text -> Inline) -> Text -> Inline
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Double -> String
forall a. Show a => a -> String
show Double
n]
        CellValue
EmptyValue -> []

      applyBold :: [Inline] -> [Inline]
applyBold [Inline]
inls = if XlsxCell -> Bool
cellBold XlsxCell
cell then [[Inline] -> Inline
Strong [Inline]
inls] else [Inline]
inls
      applyItalic :: [Inline] -> [Inline]
applyItalic [Inline]
inls = if XlsxCell -> Bool
cellItalic XlsxCell
cell then [[Inline] -> Inline
Emph [Inline]
inls] else [Inline]
inls

   in [Inline] -> [Inline]
applyItalic ([Inline] -> [Inline]) -> [Inline] -> [Inline]
forall a b. (a -> b) -> a -> b
$ [Inline] -> [Inline]
applyBold [Inline]
base