module Nbparts.Util.Markdown where

import Commonmark qualified
import Control.Monad.Identity (runIdentity)
import Data.Data (Data)
import Data.Data qualified as Data
import Data.Function ((&))
import Data.List.NonEmpty qualified as NonEmptyList
import Data.Maybe qualified as Maybe
import Data.Sequence (Seq ((:|>)))
import Data.Sequence qualified as Sequence
import Data.Text (Text)
import Data.Text qualified as Text
import Nbparts.Types.Sources.Markdown
  ( Block (Block),
    BlockType (RawBlock, ReferenceLinkDefinition),
    Blocks,
    Inline (Inline),
    InlineType (Image, RawInline),
    Inlines (Inlines),
  )
import Nbparts.Util.Text qualified as TextUtil

-- TODO: Revisit syntax spec
parseMarkdown :: Text -> Either Commonmark.ParseError Blocks
parseMarkdown :: Text -> Either ParseError Blocks
parseMarkdown Text
mdText =
  Identity (Either ParseError Blocks) -> Either ParseError Blocks
forall a. Identity a -> a
runIdentity (Identity (Either ParseError Blocks) -> Either ParseError Blocks)
-> Identity (Either ParseError Blocks) -> Either ParseError Blocks
forall a b. (a -> b) -> a -> b
$
    Text
mdText
      Text -> (Text -> [Tok]) -> [Tok]
forall a b. a -> (a -> b) -> b
& String -> Text -> [Tok]
Commonmark.tokenize String
""
      [Tok]
-> ([Tok] -> Identity (Either ParseError Blocks))
-> Identity (Either ParseError Blocks)
forall a b. a -> (a -> b) -> b
& SyntaxSpec Identity Inlines Blocks
-> [Tok] -> Identity (Either ParseError Blocks)
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl -> [Tok] -> m (Either ParseError bl)
Commonmark.parseCommonmarkWith SyntaxSpec Identity Inlines Blocks
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl
Commonmark.defaultSyntaxSpec

sourceRangeToIndices :: [Text] -> Commonmark.SourceRange -> Maybe (Int, Int)
sourceRangeToIndices :: [Text] -> SourceRange -> Maybe (Int, Int)
sourceRangeToIndices [Text]
mdLines (Commonmark.SourceRange [(SourcePos, SourcePos)]
srcRange) = do
  let srcRange' :: NonEmpty (SourcePos, SourcePos)
srcRange' = [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> NonEmpty a
NonEmptyList.fromList [(SourcePos, SourcePos)]
srcRange
      (SourcePos
startPos, SourcePos
_) = NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.head NonEmpty (SourcePos, SourcePos)
srcRange'
      (SourcePos
_, SourcePos
endPos) = NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.last NonEmpty (SourcePos, SourcePos)
srcRange'
  Int
startIdx <- [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines SourcePos
startPos
  Int
endIdx <- [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines SourcePos
endPos
  if Int
startIdx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
endIdx
    then
      (Int, Int) -> Maybe (Int, Int)
forall a. a -> Maybe a
Just (Int
startIdx, Int
endIdx)
    else
      Maybe (Int, Int)
forall a. Maybe a
Nothing

sourcePosToIndices :: [Text] -> Commonmark.SourcePos -> Maybe Int
sourcePosToIndices :: [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines SourcePos
srcPos = do
  let line :: Int
line = SourcePos -> Int
Commonmark.sourceLine SourcePos
srcPos
      column :: Int
column = SourcePos -> Int
Commonmark.sourceColumn SourcePos
srcPos
  [Text] -> Int -> Int -> Maybe Int
TextUtil.lineColToIndex [Text]
mdLines Int
line Int
column

blockSourceRangeToIndices :: [Text] -> Commonmark.SourceRange -> Maybe (Int, Int)
blockSourceRangeToIndices :: [Text] -> SourceRange -> Maybe (Int, Int)
blockSourceRangeToIndices [Text]
mdLines (Commonmark.SourceRange [(SourcePos, SourcePos)]
srcRange) = do
  let srcRange' :: NonEmpty (SourcePos, SourcePos)
srcRange' = [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> NonEmpty a
NonEmptyList.fromList [(SourcePos, SourcePos)]
srcRange
      (SourcePos
startPos, SourcePos
_) = NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.head NonEmpty (SourcePos, SourcePos)
srcRange'
      (SourcePos
_, SourcePos
endPos) = NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.last NonEmpty (SourcePos, SourcePos)
srcRange'
  Int
startIdx <- [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines SourcePos
startPos
  Int
endIdx <- [Text] -> SourcePos -> Maybe Int
blockEndSourcePosToIndices [Text]
mdLines SourcePos
endPos
  if Int
startIdx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
endIdx
    then
      (Int, Int) -> Maybe (Int, Int)
forall a. a -> Maybe a
Just (Int
startIdx, Int
endIdx)
    else
      Maybe (Int, Int)
forall a. Maybe a
Nothing

blockEndSourcePosToIndices :: [Text] -> Commonmark.SourcePos -> Maybe Int
blockEndSourcePosToIndices :: [Text] -> SourcePos -> Maybe Int
blockEndSourcePosToIndices [Text]
mdLines SourcePos
srcPos = do
  let lineCount :: Int
lineCount = [Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
mdLines
  -- For the last block element, Commonmark seems to set the end index
  -- to (number of lines + 1, 1), which is a problematic as there is
  -- no `number of lines +1`th line.
  let rawLine :: Int
rawLine = SourcePos -> Int
Commonmark.sourceLine SourcePos
srcPos
      rawColumn :: Int
rawColumn = SourcePos -> Int
Commonmark.sourceColumn SourcePos
srcPos
      (Int
line, Int
column) =
        if Int
rawLine Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
lineCount Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Bool -> Bool -> Bool
&& Int
rawColumn Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
          then
            (Int
lineCount, Text -> Int
Text.length ([Text] -> Text
forall a. HasCallStack => [a] -> a
last [Text]
mdLines) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          else
            (Int
rawLine, Int
rawColumn)
  [Text] -> Int -> Int -> Maybe Int
TextUtil.lineColToIndex [Text]
mdLines Int
line Int
column

commentChangesWith :: (Text -> Text) -> [Text] -> Blocks -> [((Int, Int), Text)]
commentChangesWith :: (Text -> Text) -> [Text] -> Blocks -> [((Int, Int), Text)]
commentChangesWith Text -> Text
transformComment [Text]
mdLines = Blocks -> [((Int, Int), Text)]
forall b. Data b => b -> [((Int, Int), Text)]
go
  where
    mdText :: Text
mdText = Text -> [Text] -> Text
Text.intercalate Text
"\n" [Text]
mdLines

    go :: (Data b) => b -> [((Int, Int), Text)]
    go :: forall b. Data b => b -> [((Int, Int), Text)]
go b
node
      | Just (SourceRange
srcRange, Text
html) <- b -> Maybe (SourceRange, Text)
forall a. Data a => a -> Maybe (SourceRange, Text)
htmlSourceRangeAndText b
node,
        Text -> Text -> Bool
Text.isPrefixOf Text
"<!--" Text
html =
          let -- Inlines don't have the same quirk as blocks (the (number of lines + 1, 1) end index issue).
              -- Applying `blockSourceRangeToIndices` to the source range for inlines should exhibit the same
              -- behaviour as `sourceRangeToIndices`.
              -- Safety: The Commonmark parser and `blockSourceRangeToIndices` should always return valid indices.
              indices :: (Int, Int)
indices@(Int
startIdx, Int
endIdx) = Maybe (Int, Int) -> (Int, Int)
forall a. HasCallStack => Maybe a -> a
Maybe.fromJust (Maybe (Int, Int) -> (Int, Int)) -> Maybe (Int, Int) -> (Int, Int)
forall a b. (a -> b) -> a -> b
$ [Text] -> SourceRange -> Maybe (Int, Int)
blockSourceRangeToIndices [Text]
mdLines SourceRange
srcRange
              -- Unfortunately, the `html` given by the Commonmark parser leaves out some whitespace for
              -- mutiline inline HTML, so we need to manually extract the full HTML from the original text.
              fullHtml :: Text
fullHtml = Text
mdText Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
Text.take Int
endIdx Text -> (Text -> Text) -> Text
forall a b. a -> (a -> b) -> b
& Int -> Text -> Text
Text.drop Int
startIdx
           in [((Int, Int)
indices, Text -> Text
transformComment Text
fullHtml)]
      | Bool
otherwise = [[((Int, Int), Text)]] -> [((Int, Int), Text)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[((Int, Int), Text)]] -> [((Int, Int), Text)])
-> [[((Int, Int), Text)]] -> [((Int, Int), Text)]
forall a b. (a -> b) -> a -> b
$ (forall b. Data b => b -> [((Int, Int), Text)])
-> b -> [[((Int, Int), Text)]]
forall a u. Data a => (forall d. Data d => d -> u) -> a -> [u]
forall u. (forall d. Data d => d -> u) -> b -> [u]
Data.gmapQ d -> [((Int, Int), Text)]
forall b. Data b => b -> [((Int, Int), Text)]
go b
node

    htmlSourceRangeAndText :: (Data a) => a -> Maybe (Commonmark.SourceRange, Text)
    htmlSourceRangeAndText :: forall a. Data a => a -> Maybe (SourceRange, Text)
htmlSourceRangeAndText a
node
      | Just (Block (RawBlock (Commonmark.Format Text
"html") Text
html) SourceRange
srcRange Map Text Text
_attrs) <- a -> Maybe Block
forall a b. (Typeable a, Typeable b) => a -> Maybe b
Data.cast a
node =
          (SourceRange, Text) -> Maybe (SourceRange, Text)
forall a. a -> Maybe a
Just (SourceRange
srcRange, Text
html)
      | Just (Inline (RawInline (Commonmark.Format Text
"html") Text
html) SourceRange
srcRange Map Text Text
_attrs) <- a -> Maybe Inline
forall a b. (Typeable a, Typeable b) => a -> Maybe b
Data.cast a
node =
          (SourceRange, Text) -> Maybe (SourceRange, Text)
forall a. a -> Maybe a
Just (SourceRange
srcRange, Text
html)
      | Bool
otherwise = Maybe (SourceRange, Text)
forall a. Maybe a
Nothing

-- Because we want to maintain as much of the original formatting as possible,
-- instead of modifying the AST and turning it into text (which would modify some of the formatting
-- since the AST doesn't have enough information about the formatting), we collect the positions of
-- the image links and what to replace them with before performing the replacement using plain text
-- manipulation. That way, we leave everything else untouched.
attachmentChangesWith :: (Text -> Maybe Text) -> [Text] -> Blocks -> [((Int, Int), Text)]
attachmentChangesWith :: (Text -> Maybe Text) -> [Text] -> Blocks -> [((Int, Int), Text)]
attachmentChangesWith Text -> Maybe Text
transformTarget [Text]
mdLines = Blocks -> [((Int, Int), Text)]
forall b. Data b => b -> [((Int, Int), Text)]
go
  where
    mdText :: Text
mdText = Text -> [Text] -> Text
Text.intercalate Text
"\n" [Text]
mdLines
    go :: (Data b) => b -> [((Int, Int), Text)]
    go :: forall b. Data b => b -> [((Int, Int), Text)]
go b
node
      | Just (Inline (Image Text
target Text
_title (Inlines Seq Inline
ils)) SourceRange
srcRange Map Text Text
_attrs) <- b -> Maybe Inline
forall a b. (Typeable a, Typeable b) => a -> Maybe b
Data.cast b
node =
          let unSrcRange :: NonEmpty (SourcePos, SourcePos)
unSrcRange = [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> NonEmpty a
NonEmptyList.fromList ([(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos))
-> [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a b. (a -> b) -> a -> b
$ SourceRange -> [(SourcePos, SourcePos)]
Commonmark.unSourceRange SourceRange
srcRange
              -- Find the end index of the image's inlines, or if there are no inlines,
              -- then just use the image's start index. This index is used as the starting
              -- point from which to search for the target text.
              -- Safety: This should never fail since the indices are valid.
              searchStart :: Int
searchStart = Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
Maybe.fromJust (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines (SourcePos -> Maybe Int) -> SourcePos -> Maybe Int
forall a b. (a -> b) -> a -> b
$ case Seq Inline
ils of
                Seq Inline
_ :|> (Inline InlineType
_ (Commonmark.SourceRange [(SourcePos, SourcePos)]
sr) Map Text Text
_) -> (SourcePos, SourcePos) -> SourcePos
forall a b. (a, b) -> b
snd ((SourcePos, SourcePos) -> SourcePos)
-> (SourcePos, SourcePos) -> SourcePos
forall a b. (a -> b) -> a -> b
$ [(SourcePos, SourcePos)] -> (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> a
last [(SourcePos, SourcePos)]
sr
                Seq Inline
Sequence.Empty -> (SourcePos, SourcePos) -> SourcePos
forall a b. (a, b) -> a
fst ((SourcePos, SourcePos) -> SourcePos)
-> (SourcePos, SourcePos) -> SourcePos
forall a b. (a -> b) -> a -> b
$ NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.head NonEmpty (SourcePos, SourcePos)
unSrcRange
           in Maybe ((Int, Int), Text) -> [((Int, Int), Text)]
forall a. Maybe a -> [a]
Maybe.maybeToList (Maybe ((Int, Int), Text) -> [((Int, Int), Text)])
-> Maybe ((Int, Int), Text) -> [((Int, Int), Text)]
forall a b. (a -> b) -> a -> b
$ SourceRange -> Text -> Int -> Maybe ((Int, Int), Text)
tryMkChange SourceRange
srcRange Text
target Int
searchStart
      | Just (Block (ReferenceLinkDefinition Text
label (Text
target, Text
_title)) SourceRange
srcRange Map Text Text
_attrs) <- b -> Maybe Block
forall a b. (Typeable a, Typeable b) => a -> Maybe b
Data.cast b
node =
          let unSrcRange :: NonEmpty (SourcePos, SourcePos)
unSrcRange = [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> NonEmpty a
NonEmptyList.fromList ([(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos))
-> [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a b. (a -> b) -> a -> b
$ SourceRange -> [(SourcePos, SourcePos)]
Commonmark.unSourceRange SourceRange
srcRange
              -- Safety: This should never fail since the indices are valid.
              -- We're simply adding the length of the label to the start index of the reference link definition.
              -- The length of the label is added to "skip" the label.
              searchStart :: Int
searchStart = Text -> Int
Text.length Text
label Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
Maybe.fromJust ([Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines ((SourcePos, SourcePos) -> SourcePos
forall a b. (a, b) -> a
fst ((SourcePos, SourcePos) -> SourcePos)
-> (SourcePos, SourcePos) -> SourcePos
forall a b. (a -> b) -> a -> b
$ NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.head NonEmpty (SourcePos, SourcePos)
unSrcRange))
           in Maybe ((Int, Int), Text) -> [((Int, Int), Text)]
forall a. Maybe a -> [a]
Maybe.maybeToList (Maybe ((Int, Int), Text) -> [((Int, Int), Text)])
-> Maybe ((Int, Int), Text) -> [((Int, Int), Text)]
forall a b. (a -> b) -> a -> b
$ SourceRange -> Text -> Int -> Maybe ((Int, Int), Text)
tryMkChange SourceRange
srcRange Text
target Int
searchStart
      | Bool
otherwise = [[((Int, Int), Text)]] -> [((Int, Int), Text)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[((Int, Int), Text)]] -> [((Int, Int), Text)])
-> [[((Int, Int), Text)]] -> [((Int, Int), Text)]
forall a b. (a -> b) -> a -> b
$ (forall b. Data b => b -> [((Int, Int), Text)])
-> b -> [[((Int, Int), Text)]]
forall a u. Data a => (forall d. Data d => d -> u) -> a -> [u]
forall u. (forall d. Data d => d -> u) -> b -> [u]
Data.gmapQ d -> [((Int, Int), Text)]
forall b. Data b => b -> [((Int, Int), Text)]
go b
node

    -- Finds the start and end indices of an image target and constructs the replacement.
    --
    -- If `transformTarget` fails, then we know this is not an attachment target.
    -- `findSliceBetween` should never fail for a reference link definition. However,
    -- it can fail for an image, in which case the target must be defined in a reference link definition.
    -- In other words, the `Nothing` case of `tryMkChange` doesn't indicate failure, but simply means that
    -- there is nothing we need to replace.
    tryMkChange :: Commonmark.SourceRange -> Text -> Int -> Maybe ((Int, Int), Text)
    tryMkChange :: SourceRange -> Text -> Int -> Maybe ((Int, Int), Text)
tryMkChange SourceRange
srcRange Text
target Int
searchStart = do
      Text
transformedTarget <- Text -> Maybe Text
transformTarget Text
target

      -- Now, we need to find the start and end indices of the image target.
      let unSrcRange :: NonEmpty (SourcePos, SourcePos)
unSrcRange = [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a. HasCallStack => [a] -> NonEmpty a
NonEmptyList.fromList ([(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos))
-> [(SourcePos, SourcePos)] -> NonEmpty (SourcePos, SourcePos)
forall a b. (a -> b) -> a -> b
$ SourceRange -> [(SourcePos, SourcePos)]
Commonmark.unSourceRange SourceRange
srcRange
          -- Safety: This should never fail since the indices are valid.
          searchEnd :: Int
searchEnd = Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
Maybe.fromJust (Maybe Int -> Int)
-> (NonEmpty (SourcePos, SourcePos) -> Maybe Int)
-> NonEmpty (SourcePos, SourcePos)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> SourcePos -> Maybe Int
sourcePosToIndices [Text]
mdLines (SourcePos -> Maybe Int)
-> (NonEmpty (SourcePos, SourcePos) -> SourcePos)
-> NonEmpty (SourcePos, SourcePos)
-> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (SourcePos, SourcePos) -> SourcePos
forall a b. (a, b) -> b
snd ((SourcePos, SourcePos) -> SourcePos)
-> (NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos))
-> NonEmpty (SourcePos, SourcePos)
-> SourcePos
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty (SourcePos, SourcePos) -> (SourcePos, SourcePos)
forall a. NonEmpty a -> a
NonEmptyList.last (NonEmpty (SourcePos, SourcePos) -> Int)
-> NonEmpty (SourcePos, SourcePos) -> Int
forall a b. (a -> b) -> a -> b
$ NonEmpty (SourcePos, SourcePos)
unSrcRange

      (Int, Int)
indices <- Int -> Int -> Text -> Text -> Maybe (Int, Int)
TextUtil.findSliceBetween Int
searchStart Int
searchEnd Text
mdText Text
target
      ((Int, Int), Text) -> Maybe ((Int, Int), Text)
forall a. a -> Maybe a
Just ((Int, Int)
indices, Text
transformedTarget)