{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{- |
   Module      : Text.Pandoc.Readers.Mdoc.Lex
   Copyright   : Copyright (C) 2024 Evan Silberman
   License     : GNU GPL, version 2 or above

   Maintainer  : Evan Silberman <evan@jklol.net>
   Stability   : WIP
   Portability : portable

Tokenizer for mdoc
-}
module Text.Pandoc.Readers.Mdoc.Lex
  ( MdocToken(..)
  , MdocTokens(..)
  , DelimSide(..)
  , lexMdoc
  , toString
  )
where

import Control.Monad (void, guard, when)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class.PandocMonad (PandocMonad(..))
import Data.Char (isAlphaNum)
import Data.Maybe (isJust)
import qualified Data.Text as T
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.Roff.Escape
import Text.Pandoc.Readers.Mdoc.Macros
import qualified Data.Sequence as Seq

-- As a higher level language with a wealth of semantic macros, mdoc
-- discourages authors from falling back to low-level roff features like font
-- selection, custom macros, defined strings, etc. Pandoc's mdoc reader is
-- accordingly implemented as a high-level interpreter of mdoc's semantic macros
-- and almost no raw roff requests are supported.
--
-- tbl(7) and eqn(7) macros are rare but not completely unseen in mdoc manuals.
-- they are not yet implemented. most use of tbl macros in mdoc could probably
-- be replaced with .Bl -column

data DelimSide = Open | Middle | Close deriving (Int -> DelimSide -> ShowS
[DelimSide] -> ShowS
DelimSide -> String
(Int -> DelimSide -> ShowS)
-> (DelimSide -> String)
-> ([DelimSide] -> ShowS)
-> Show DelimSide
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DelimSide -> ShowS
showsPrec :: Int -> DelimSide -> ShowS
$cshow :: DelimSide -> String
show :: DelimSide -> String
$cshowList :: [DelimSide] -> ShowS
showList :: [DelimSide] -> ShowS
Show, DelimSide -> DelimSide -> Bool
(DelimSide -> DelimSide -> Bool)
-> (DelimSide -> DelimSide -> Bool) -> Eq DelimSide
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DelimSide -> DelimSide -> Bool
== :: DelimSide -> DelimSide -> Bool
$c/= :: DelimSide -> DelimSide -> Bool
/= :: DelimSide -> DelimSide -> Bool
Eq)

-- | Tokens for Mdoc documents
data MdocToken = Str T.Text SourcePos -- ^ The contents of a text line
               | Macro T.Text SourcePos  -- ^ A macro to be processed
               | Lit T.Text SourcePos  -- ^ Literal text on a control line
               | Blank SourcePos  -- ^ A blank line
               | Delim DelimSide T.Text SourcePos  -- ^ A delimiter character
               | Eol  -- ^ The end of a control line
               deriving Int -> MdocToken -> ShowS
[MdocToken] -> ShowS
MdocToken -> String
(Int -> MdocToken -> ShowS)
-> (MdocToken -> String)
-> ([MdocToken] -> ShowS)
-> Show MdocToken
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MdocToken -> ShowS
showsPrec :: Int -> MdocToken -> ShowS
$cshow :: MdocToken -> String
show :: MdocToken -> String
$cshowList :: [MdocToken] -> ShowS
showList :: [MdocToken] -> ShowS
Show

toString :: MdocToken -> T.Text
toString :: MdocToken -> Text
toString (Str Text
x SourcePos
_) = Text
x
toString (Macro Text
x SourcePos
_) = Text
x
toString (Lit Text
x SourcePos
_) = Text
x
toString (Delim DelimSide
_ Text
x SourcePos
_) = Text
x
toString Blank{} = Text
forall a. Monoid a => a
mempty
toString MdocToken
Eol = Text
forall a. Monoid a => a
mempty

newtype MdocTokens = MdocTokens { MdocTokens -> Seq MdocToken
unMdocTokens :: Seq.Seq MdocToken }
        deriving (Int -> MdocTokens -> ShowS
[MdocTokens] -> ShowS
MdocTokens -> String
(Int -> MdocTokens -> ShowS)
-> (MdocTokens -> String)
-> ([MdocTokens] -> ShowS)
-> Show MdocTokens
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MdocTokens -> ShowS
showsPrec :: Int -> MdocTokens -> ShowS
$cshow :: MdocTokens -> String
show :: MdocTokens -> String
$cshowList :: [MdocTokens] -> ShowS
showList :: [MdocTokens] -> ShowS
Show, NonEmpty MdocTokens -> MdocTokens
MdocTokens -> MdocTokens -> MdocTokens
(MdocTokens -> MdocTokens -> MdocTokens)
-> (NonEmpty MdocTokens -> MdocTokens)
-> (forall b. Integral b => b -> MdocTokens -> MdocTokens)
-> Semigroup MdocTokens
forall b. Integral b => b -> MdocTokens -> MdocTokens
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
$c<> :: MdocTokens -> MdocTokens -> MdocTokens
<> :: MdocTokens -> MdocTokens -> MdocTokens
$csconcat :: NonEmpty MdocTokens -> MdocTokens
sconcat :: NonEmpty MdocTokens -> MdocTokens
$cstimes :: forall b. Integral b => b -> MdocTokens -> MdocTokens
stimes :: forall b. Integral b => b -> MdocTokens -> MdocTokens
Semigroup, Semigroup MdocTokens
MdocTokens
Semigroup MdocTokens =>
MdocTokens
-> (MdocTokens -> MdocTokens -> MdocTokens)
-> ([MdocTokens] -> MdocTokens)
-> Monoid MdocTokens
[MdocTokens] -> MdocTokens
MdocTokens -> MdocTokens -> MdocTokens
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
$cmempty :: MdocTokens
mempty :: MdocTokens
$cmappend :: MdocTokens -> MdocTokens -> MdocTokens
mappend :: MdocTokens -> MdocTokens -> MdocTokens
$cmconcat :: [MdocTokens] -> MdocTokens
mconcat :: [MdocTokens] -> MdocTokens
Monoid)

singleTok :: MdocToken -> MdocTokens
singleTok :: MdocToken -> MdocTokens
singleTok MdocToken
t = Seq MdocToken -> MdocTokens
MdocTokens (MdocToken -> Seq MdocToken
forall a. a -> Seq a
Seq.singleton MdocToken
t)

type Lexer m = ParsecT Sources () m

instance RoffLikeLexer MdocTokens where
  -- This is a bit confusing. We're lexing to MdocTokens, but for escaping
  -- purposes we just want Texts.
  type Token MdocTokens = T.Text
  -- We don't need a state
  type State MdocTokens = ()
  -- We don't support predefined string expansion
  expandString :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens ()
expandString = () -> ParsecT Sources () m ()
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  escString :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escString = Text -> ParsecT Sources () m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
forall a. Monoid a => a
mempty
  -- what token type the unescaped text gets wrapped in is decided by other
  -- parts of the lexer.
  emit :: Text -> Token MdocTokens
emit = Text -> Text
Text -> Token MdocTokens
forall a. a -> a
id
  -- All escapes are resolved in the lexer and we never need to emit anything,
  -- vs. the roff lexer which has to push the backlashes to the output while
  -- in copy mode.
  backslash :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens ()
backslash = (ParsecT Sources () m ()
forall a. Monoid a => a
mempty ParsecT Sources () m ()
-> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall a b.
ParsecT Sources () m a
-> ParsecT Sources () m b -> ParsecT Sources () m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\\') ParsecT Sources () m ()
-> ParsecT Sources () m () -> ParsecT Sources () m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (ParsecT Sources () m ()
forall a. Monoid a => a
mempty ParsecT Sources () m ()
-> ParsecT Sources () m String -> ParsecT Sources () m ()
forall a b.
ParsecT Sources () m a
-> ParsecT Sources () m b -> ParsecT Sources () m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* String -> ParsecT Sources () m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
"\\E")
  -- We don't support macro definition and we don't output anything for \A
  checkDefined :: forall (m :: * -> *).
PandocMonad m =>
Text -> Lexer m MdocTokens (Token MdocTokens)
checkDefined = ParsecT Sources () m Text -> Text -> ParsecT Sources () m Text
forall a b. a -> b -> a
const ParsecT Sources () m Text
forall a. Monoid a => a
mempty
  -- We don't support copy mode and \E is treated as backslash
  escE :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escE = Text -> ParsecT Sources () m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
forall a. Monoid a => a
mempty
  -- We don't support low-level font selection
  escFont :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escFont = Char
-> [Lexer m MdocTokens Text]
-> ParsecT Sources (State MdocTokens) m (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Char -> [Lexer m x Text] -> Lexer m x (Token x)
escIgnore Char
'f' [Lexer m MdocTokens Text
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x Text
escapeArg, Int -> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char, Monad m) =>
Int -> ParsecT s st m Char -> ParsecT s st m Text
countChar Int
1 ((Char -> Bool) -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n'))]

eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m MdocToken
eofline :: forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline = do
  ParsecT s u m Char -> ParsecT s u m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline ParsecT s u m () -> ParsecT s u m () -> ParsecT s u m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT s u m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
  MdocToken -> ParsecT s u m MdocToken
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocToken
Eol

lexComment :: PandocMonad m => Lexer m MdocTokens
lexComment :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexComment = do
  ParsecT Sources () m String -> ParsecT Sources () m String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT Sources () m String -> ParsecT Sources () m String)
-> ParsecT Sources () m String -> ParsecT Sources () m String
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
".\\\""
  ParsecT Sources () m Char -> ParsecT Sources () m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany (ParsecT Sources () m Char -> ParsecT Sources () m ())
-> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\n"
  ParsecT Sources () m MdocToken
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline
  MdocTokens -> Lexer m MdocTokens
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocTokens
forall a. Monoid a => a
mempty

argText :: PandocMonad m => Lexer m T.Text
argText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
argText = do
  Text
beg <- Lexer m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText
  Text
end <- [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> ParsecT Sources () m [Text] -> Lexer m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Lexer m Text -> ParsecT Sources () m [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Lexer m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar)
  Text -> Lexer m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Lexer m Text) -> Text -> Lexer m Text
forall a b. (a -> b) -> a -> b
$ Text
beg Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
end

spaceTabChar :: PandocMonad m => Lexer m T.Text
spaceTabChar :: forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar = Char -> Text
T.singleton (Char -> Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Sources () m Char
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Char
spaceChar

quotedArg :: PandocMonad m => Lexer m T.Text
quotedArg :: forall (m :: * -> *). PandocMonad m => Lexer m Text
quotedArg = do
  Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  Text
t <- [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> ParsecT Sources () m [Text] -> Lexer m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Lexer m Text -> ParsecT Sources () m [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try Lexer m Text
forall {u}. ParsecT Sources u m Text
innerQuote Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar)
  Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  Lexer m Text -> ParsecT Sources () m ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  Text -> Lexer m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
t
  where
    innerQuote :: ParsecT Sources u m Text
innerQuote = do
      String -> ParsecT Sources u m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
"\"\""
      Text -> ParsecT Sources u m Text
forall a. a -> ParsecT Sources u m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"\""

anyText :: PandocMonad m => Lexer m T.Text
anyText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
anyText = ParsecT Sources () m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar

regularText :: PandocMonad m => Lexer m T.Text
regularText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText = ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) t st.
Stream s m t =>
ParsecT s st m Char -> ParsecT s st m Text
many1Char (ParsecT Sources () m Char -> ParsecT Sources () m Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\n\r\t \\\""

quoteChar :: PandocMonad m => Lexer m T.Text
quoteChar :: forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar = Char -> Text
T.singleton (Char -> Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'"'

mdocToken :: PandocMonad m => Lexer m MdocTokens
mdocToken :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
mdocToken = Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexComment Lexer m MdocTokens -> Lexer m MdocTokens -> Lexer m MdocTokens
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexControlLine Lexer m MdocTokens -> Lexer m MdocTokens -> Lexer m MdocTokens
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexTextLine

lexMacroName :: PandocMonad m => Lexer m T.Text
lexMacroName :: forall (m :: * -> *). PandocMonad m => Lexer m Text
lexMacroName = ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) t st.
Stream s m t =>
ParsecT s st m Char -> ParsecT s st m Text
many1Char ((Char -> Bool) -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isMacroChar)
  where
    isMacroChar :: Char -> Bool
isMacroChar Char
'%' = Bool
True
    isMacroChar Char
x = Char -> Bool
isAlphaNum Char
x

lexMacro :: PandocMonad m => Lexer m MdocToken
lexMacro :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexMacro = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Text
name <- Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
lexMacroName
  ParsecT Sources () m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof ParsecT Sources () m ()
-> ParsecT Sources () m () -> ParsecT Sources () m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (ParsecT Sources () m Char
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Char
spaceChar ParsecT Sources () m Char
-> ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline))
  ParsecT Sources () m ()
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
skipSpaces
  MdocToken -> Lexer m MdocToken
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocToken -> Lexer m MdocToken) -> MdocToken -> Lexer m MdocToken
forall a b. (a -> b) -> a -> b
$ Text -> SourcePos -> MdocToken
Macro Text
name SourcePos
pos

lexCallableMacro :: PandocMonad m => Lexer m MdocToken
lexCallableMacro :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexCallableMacro = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Maybe Text
q <- ParsecT Sources () m Text -> ParsecT Sources () m (Maybe Text)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  Text
name <- ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
lexMacroName
  Bool -> ParsecT Sources () m () -> ParsecT Sources () m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Maybe Text -> Bool
forall a. Maybe a -> Bool
isJust Maybe Text
q) (ParsecT Sources () m Text -> ParsecT Sources () m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar)
  ParsecT Sources () m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof ParsecT Sources () m ()
-> ParsecT Sources () m () -> ParsecT Sources () m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (ParsecT Sources () m Char
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Char
spaceChar ParsecT Sources () m Char
-> ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline))
  ParsecT Sources () m ()
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
skipSpaces
  Bool -> ParsecT Sources () m ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Sources () m ())
-> Bool -> ParsecT Sources () m ()
forall a b. (a -> b) -> a -> b
$ Text -> Bool
isCallableMacro Text
name
  MdocToken -> Lexer m MdocToken
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocToken -> Lexer m MdocToken) -> MdocToken -> Lexer m MdocToken
forall a b. (a -> b) -> a -> b
$ Text -> SourcePos -> MdocToken
Macro Text
name SourcePos
pos

lexDelim :: (PandocMonad m) => Lexer m MdocToken
lexDelim :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Maybe Text
q <- ParsecT Sources () m Text -> ParsecT Sources () m (Maybe Text)
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  SourcePos -> MdocToken
t <-
    DelimSide -> Text -> SourcePos -> MdocToken
Delim DelimSide
Open (Text -> SourcePos -> MdocToken)
-> ParsecT Sources () m Text
-> ParsecT Sources () m (SourcePos -> MdocToken)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> ParsecT Sources () m Text
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Text] -> ParsecT s st m Text
oneOfStrings [Text
"(", Text
"["]
      ParsecT Sources () m (SourcePos -> MdocToken)
-> ParsecT Sources () m (SourcePos -> MdocToken)
-> ParsecT Sources () m (SourcePos -> MdocToken)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> DelimSide -> Text -> SourcePos -> MdocToken
Delim DelimSide
Close (Text -> SourcePos -> MdocToken)
-> ParsecT Sources () m Text
-> ParsecT Sources () m (SourcePos -> MdocToken)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> ParsecT Sources () m Text
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Text] -> ParsecT s st m Text
oneOfStrings [Text
".", Text
",", Text
":", Text
";", Text
")", Text
"]", Text
"?", Text
"!"]
      ParsecT Sources () m (SourcePos -> MdocToken)
-> ParsecT Sources () m (SourcePos -> MdocToken)
-> ParsecT Sources () m (SourcePos -> MdocToken)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> DelimSide -> Text -> SourcePos -> MdocToken
Delim DelimSide
Middle (Text -> SourcePos -> MdocToken)
-> ParsecT Sources () m Text
-> ParsecT Sources () m (SourcePos -> MdocToken)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> ParsecT Sources () m Text
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
Text -> ParsecT s u m Text
textStr Text
"|"
  Bool -> ParsecT Sources () m () -> ParsecT Sources () m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Maybe Text -> Bool
forall a. Maybe a -> Bool
isJust Maybe Text
q) (ParsecT Sources () m Text -> ParsecT Sources () m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar)
  ParsecT Sources () m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof ParsecT Sources () m ()
-> ParsecT Sources () m () -> ParsecT Sources () m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (ParsecT Sources () m Char
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Char
spaceChar ParsecT Sources () m Char
-> ParsecT Sources () m Char -> ParsecT Sources () m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline))
  ParsecT Sources () m ()
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
skipSpaces
  MdocToken -> Lexer m MdocToken
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocToken -> Lexer m MdocToken) -> MdocToken -> Lexer m MdocToken
forall a b. (a -> b) -> a -> b
$ SourcePos -> MdocToken
t SourcePos
pos

lexLit :: PandocMonad m => Lexer m MdocToken
lexLit :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Text
t <- Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
argText Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quotedArg
  ParsecT Sources () m ()
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
skipSpaces
  MdocToken -> Lexer m MdocToken
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocToken -> Lexer m MdocToken) -> MdocToken -> Lexer m MdocToken
forall a b. (a -> b) -> a -> b
$ Text -> SourcePos -> MdocToken
Lit Text
t SourcePos
pos

lexTextLine :: PandocMonad m => Lexer m MdocTokens
lexTextLine :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexTextLine = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Bool -> ParsecT Sources () m ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Sources () m ())
-> Bool -> ParsecT Sources () m ()
forall a b. (a -> b) -> a -> b
$ SourcePos -> Int
sourceColumn SourcePos
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
  Text
t <- [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text)
-> ParsecT Sources () m [Text] -> ParsecT Sources () m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Sources () m Text -> ParsecT Sources () m [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
anyText
  ParsecT Sources () m MdocToken
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline
  if Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
t
     then MdocTokens -> Lexer m MdocTokens
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocTokens -> Lexer m MdocTokens)
-> MdocTokens -> Lexer m MdocTokens
forall a b. (a -> b) -> a -> b
$ MdocToken -> MdocTokens
singleTok (MdocToken -> MdocTokens) -> MdocToken -> MdocTokens
forall a b. (a -> b) -> a -> b
$ SourcePos -> MdocToken
Blank SourcePos
pos
     else MdocTokens -> Lexer m MdocTokens
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocTokens -> Lexer m MdocTokens)
-> MdocTokens -> Lexer m MdocTokens
forall a b. (a -> b) -> a -> b
$ MdocToken -> MdocTokens
singleTok (MdocToken -> MdocTokens) -> MdocToken -> MdocTokens
forall a b. (a -> b) -> a -> b
$ Text -> SourcePos -> MdocToken
Str Text
t SourcePos
pos

lexControlLine :: PandocMonad m => Lexer m MdocTokens
lexControlLine :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexControlLine = do
  SourcePos
pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  Bool -> ParsecT Sources () m ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> ParsecT Sources () m ())
-> Bool -> ParsecT Sources () m ()
forall a b. (a -> b) -> a -> b
$ SourcePos -> Int
sourceColumn SourcePos
pos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
  Char -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'.'
  ParsecT Sources () m MdocToken
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocTokens
-> ParsecT Sources () m MdocTokens
forall a b.
ParsecT Sources () m a
-> ParsecT Sources () m b -> ParsecT Sources () m b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Sources () m MdocTokens
forall a. Monoid a => a
mempty ParsecT Sources () m MdocTokens
-> ParsecT Sources () m MdocTokens
-> ParsecT Sources () m MdocTokens
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> do
    m :: MdocToken
m@(Macro Text
name SourcePos
_) <- ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexMacro
    -- .Ns macros at the start of a line are ignored. We'd have to look behind
    -- to keep track of the "start of the line" in the parser, so we'll drop
    -- those macros in lexing.
    let start :: [MdocToken]
start | Text
name Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Ns" = []
              | Bool
otherwise = [MdocToken
m]
    let parsed :: Bool
parsed = Text -> Bool
isParsedMacro Text
name
    ([MdocToken]
wds, MdocToken
e) <- ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken
-> ParsecT Sources () m ([MdocToken], MdocToken)
forall s u (m :: * -> *) a b.
ParsecT s u m a -> ParsecT s u m b -> ParsecT s u m ([a], b)
manyUntil (Bool -> ParsecT Sources () m MdocToken
forall {m :: * -> *}.
PandocMonad m =>
Bool -> ParsecT Sources () m MdocToken
l Bool
parsed) ParsecT Sources () m MdocToken
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline
    MdocTokens -> ParsecT Sources () m MdocTokens
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return (MdocTokens -> ParsecT Sources () m MdocTokens)
-> MdocTokens -> ParsecT Sources () m MdocTokens
forall a b. (a -> b) -> a -> b
$ Seq MdocToken -> MdocTokens
MdocTokens (Seq MdocToken -> MdocTokens) -> Seq MdocToken -> MdocTokens
forall a b. (a -> b) -> a -> b
$ [MdocToken] -> Seq MdocToken
forall a. [a] -> Seq a
Seq.fromList ([MdocToken] -> Seq MdocToken) -> [MdocToken] -> Seq MdocToken
forall a b. (a -> b) -> a -> b
$ [MdocToken]
start [MdocToken] -> [MdocToken] -> [MdocToken]
forall a. Semigroup a => a -> a -> a
<> [MdocToken]
wds [MdocToken] -> [MdocToken] -> [MdocToken]
forall a. Semigroup a => a -> a -> a
<> [MdocToken
e]
      where
        l :: Bool -> ParsecT Sources () m MdocToken
l Bool
True = ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexCallableMacro ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit
        l Bool
False = ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit

-- | Tokenize a string as a sequence of mdoc tokens.
lexMdoc :: PandocMonad m => SourcePos -> T.Text -> m MdocTokens
lexMdoc :: forall (m :: * -> *).
PandocMonad m =>
SourcePos -> Text -> m MdocTokens
lexMdoc SourcePos
pos Text
txt = do
  Either PandocError MdocTokens
eithertokens <- ParsecT Sources () m MdocTokens
-> () -> Text -> m (Either PandocError MdocTokens)
forall (m :: * -> *) t st a.
(Monad m, ToSources t) =>
ParsecT Sources st m a -> st -> t -> m (Either PandocError a)
readWithM (do SourcePos -> ParsecT Sources () m ()
forall (m :: * -> *) s u. Monad m => SourcePos -> ParsecT s u m ()
setPosition SourcePos
pos
                                [MdocTokens] -> MdocTokens
forall a. Monoid a => [a] -> a
mconcat ([MdocTokens] -> MdocTokens)
-> ParsecT Sources () m [MdocTokens]
-> ParsecT Sources () m MdocTokens
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Sources () m MdocTokens
-> ParsecT Sources () m () -> ParsecT Sources () m [MdocTokens]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT Sources () m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
mdocToken ParsecT Sources () m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) ()
forall a. Default a => a
def Text
txt
  case Either PandocError MdocTokens
eithertokens of
    Left PandocError
e       -> PandocError -> m MdocTokens
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError PandocError
e
    Right MdocTokens
tokenz -> MdocTokens -> m MdocTokens
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocTokens
tokenz