{-# LANGUAGE CPP #-}

{- |
   Module      : Data.GraphViz.PreProcessing
   Description : Pre-process imported Dot code.
   Copyright   : (c) Ivan Lazar Miljenovic
   License     : 3-Clause BSD-style
   Maintainer  : Ivan.Miljenovic@gmail.com

   \"Real life\" Dot code contains various items that are not directly
   parseable by this library.  This module defines the 'preProcess'
   function to remove these components, which include:

     * Comments (both @\/\* ... *\/@ style and @\/\/ ... @ style);

     * Pre-processor lines (lines starting with a @#@);

     * Split lines (by inserting a @\\@ the rest of that \"line\" is
       continued on the next line).

     * Strings concatenated together using @\"...\" + \"...\"@; these
       are concatenated into one big string.
-}
module Data.GraphViz.PreProcessing(preProcess) where

import Data.GraphViz.Exception (GraphvizException (NotDotCode), throw)
import Data.GraphViz.Parsing

import           Data.Text.Lazy         (Text)
import qualified Data.Text.Lazy         as T
import           Data.Text.Lazy.Builder (Builder)
import qualified Data.Text.Lazy.Builder as B

#if !(MIN_VERSION_base(4,8,0))
import Data.Monoid (Monoid (..), mconcat)
#endif

-- -----------------------------------------------------------------------------
-- Filtering out unwanted Dot items such as comments

-- | Remove unparseable features of Dot, such as comments and
--   multi-line strings (which are converted to single-line strings).
preProcess :: Text -> Text
preProcess :: Text -> Text
preProcess Text
t = case (Either String Builder, Text) -> Either String Builder
forall a b. (a, b) -> a
fst ((Either String Builder, Text) -> Either String Builder)
-> (Either String Builder, Text) -> Either String Builder
forall a b. (a -> b) -> a -> b
$ Parse Builder -> Text -> (Either String Builder, Text)
forall a. Parse a -> Text -> (Either String a, Text)
runParser Parse Builder
parseOutUnwanted Text
t of
                 (Right Builder
r) -> Builder -> Text
B.toLazyText Builder
r
                 (Left String
l)  -> GraphvizException -> Text
forall a e. Exception e => e -> a
throw (String -> GraphvizException
NotDotCode String
l)
               -- snd should be null

-- | Parse out comments and make quoted strings spread over multiple
--   lines only over a single line.  Should parse the /entire/ input
--   'Text'.
parseOutUnwanted :: Parse Builder
parseOutUnwanted :: Parse Builder
parseOutUnwanted = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> Parser GraphvizState [Builder] -> Parse Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parse Builder -> Parser GraphvizState [Builder]
forall a. Parser GraphvizState a -> Parser GraphvizState [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parse Builder
getNext
  where
    getNext :: Parse Builder
getNext = Parse Builder
forall {s}. Parser s Builder
parseOK
              Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
              Parse Builder
parseConcatStrings
              Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
              Parse Builder
parseHTML
              Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
              Parse Builder
forall m. Monoid m => Parse m
parseUnwanted
              Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
              (Char -> Builder) -> Parser GraphvizState Char -> Parse Builder
forall a b.
(a -> b) -> Parser GraphvizState a -> Parser GraphvizState b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Builder
B.singleton Parser GraphvizState Char
forall s. Parser s Char
next

    parseOK :: Parser s Builder
parseOK = Text -> Builder
B.fromLazyText
              (Text -> Builder) -> Parser s Text -> Parser s Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser s Text
forall s. (Char -> Bool) -> Parser s Text
many1Satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char
'\n', Char
'\r', Char
'\\', Char
'/', Char
'"', Char
'<'])

-- | Parses an unwanted part of the Dot code (comments and
--   pre-processor lines; also un-splits lines).
parseUnwanted :: (Monoid m) => Parse m
parseUnwanted :: forall m. Monoid m => Parse m
parseUnwanted = [Parser GraphvizState m] -> Parser GraphvizState m
forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ Parser GraphvizState m
forall m. Monoid m => Parse m
parseLineComment
                      , Parser GraphvizState m
forall m. Monoid m => Parse m
parseMultiLineComment
                      , Parser GraphvizState m
forall m. Monoid m => Parse m
parsePreProcessor
                      , Parser GraphvizState m
forall m. Monoid m => Parse m
parseSplitLine
                      ]

-- | Remove pre-processor lines (that is, those that start with a
--   @#@).  Will consume the newline from the beginning of the
--   previous line, but will leave the one from the pre-processor line
--   there (so in the end it just removes the line).
parsePreProcessor :: (Monoid m) => Parse m
parsePreProcessor :: forall m. Monoid m => Parse m
parsePreProcessor = Parse ()
newline Parse () -> Parser GraphvizState Char -> Parser GraphvizState Char
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser GraphvizState Char
character Char
'#' Parser GraphvizState Char
-> Parser GraphvizState Text -> Parser GraphvizState Text
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser GraphvizState Text
consumeLine Parser GraphvizState Text
-> Parser GraphvizState m -> Parser GraphvizState m
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m -> Parser GraphvizState m
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure m
forall a. Monoid a => a
mempty

-- | Parse @//@-style comments.
parseLineComment :: (Monoid m) => Parse m
parseLineComment :: forall m. Monoid m => Parse m
parseLineComment = String -> Parse ()
string String
"//"
                   -- Note: do /not/ consume the newlines, as they're
                   -- needed in case the next line is a pre-processor
                   -- line.
                   Parse () -> Parser GraphvizState Text -> Parser GraphvizState Text
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser GraphvizState Text
consumeLine
                   Parser GraphvizState Text
-> Parser GraphvizState m -> Parser GraphvizState m
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m -> Parser GraphvizState m
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure m
forall a. Monoid a => a
mempty

-- | Parse @/* ... */@-style comments.
parseMultiLineComment :: (Monoid m) => Parse m
parseMultiLineComment :: forall m. Monoid m => Parse m
parseMultiLineComment = Parse ()
-> Parse ()
-> Parser GraphvizState [()]
-> Parser GraphvizState [()]
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket Parse ()
start Parse ()
end (Parse () -> Parser GraphvizState [()]
forall a. Parser GraphvizState a -> Parser GraphvizState [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parse ()
inner) Parser GraphvizState [()]
-> Parser GraphvizState m -> Parser GraphvizState m
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m -> Parser GraphvizState m
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure m
forall a. Monoid a => a
mempty
  where
    start :: Parse ()
start = String -> Parse ()
string String
"/*"
    end :: Parse ()
end = String -> Parse ()
string String
"*/"
    inner :: Parse ()
inner = ((Char -> Bool) -> Parser GraphvizState Text
forall s. (Char -> Bool) -> Parser s Text
many1Satisfy (Char
'*' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=) Parser GraphvizState Text -> Parse () -> Parse ()
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> () -> Parse ()
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
            Parse () -> Parse () -> Parse ()
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
            (Char -> Parser GraphvizState Char
character Char
'*' Parser GraphvizState Char
-> Parser GraphvizState Char -> Parser GraphvizState Char
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Char -> Bool) -> Parser GraphvizState Char
forall s. (Char -> Bool) -> Parser s Char
satisfy (Char
'/' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=) Parser GraphvizState Char -> Parse () -> Parse ()
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parse ()
inner)

parseConcatStrings :: Parse Builder
parseConcatStrings :: Parse Builder
parseConcatStrings = Builder -> Builder
wrapQuotes (Builder -> Builder)
-> ([Builder] -> Builder) -> [Builder] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> Parser GraphvizState [Builder] -> Parse Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parse Builder
-> Parser GraphvizState [()] -> Parser GraphvizState [Builder]
forall (p :: * -> *) a sep. PolyParse p => p a -> p sep -> p [a]
sepBy1 Parse Builder
parseString Parser GraphvizState [()]
parseConcat
  where
    qParse :: Parser GraphvizState a -> Parser GraphvizState a
qParse = Parser GraphvizState Char
-> Parser GraphvizState Char
-> Parser GraphvizState a
-> Parser GraphvizState a
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (Char -> Parser GraphvizState Char
character Char
'"') (Parser GraphvizState Char -> Parser GraphvizState Char
forall a. Parser GraphvizState a -> Parser GraphvizState a
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (Parser GraphvizState Char -> Parser GraphvizState Char)
-> Parser GraphvizState Char -> Parser GraphvizState Char
forall a b. (a -> b) -> a -> b
$ Char -> Parser GraphvizState Char
character Char
'"')
    parseString :: Parse Builder
parseString = Parse Builder -> Parse Builder
forall a. Parser GraphvizState a -> Parser GraphvizState a
qParse ([Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> Parser GraphvizState [Builder] -> Parse Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parse Builder -> Parser GraphvizState [Builder]
forall a. Parser GraphvizState a -> Parser GraphvizState [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parse Builder
parseInner)
    parseInner :: Parse Builder
parseInner = (String -> Parse ()
string String
"\\\"" Parse () -> Parse Builder -> Parse Builder
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Builder -> Parse Builder
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Builder
B.fromLazyText (Text -> Builder) -> Text -> Builder
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
"\\\""))
                 Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
                 -- Need to parse an explicit `\', in case it ends the
                 -- string (and thus the next step would get parsed by the
                 -- previous option).
                 (String -> Parse ()
string String
"\\\\" Parse () -> Parse Builder -> Parse Builder
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Builder -> Parse Builder
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Builder
B.fromLazyText (Text -> Builder) -> Text -> Builder
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
"\\\\"))
                 Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
                 Parse Builder
forall m. Monoid m => Parse m
parseSplitLine -- in case there's a split mid-quote
                 Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
                 (Char -> Builder) -> Parser GraphvizState Char -> Parse Builder
forall a b.
(a -> b) -> Parser GraphvizState a -> Parser GraphvizState b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Builder
B.singleton ((Char -> Bool) -> Parser GraphvizState Char
forall s. (Char -> Bool) -> Parser s Char
satisfy (Char
quoteChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=))
    parseConcat :: Parser GraphvizState [()]
parseConcat = Parser GraphvizState [()]
parseSep Parser GraphvizState [()]
-> Parser GraphvizState Char -> Parser GraphvizState Char
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser GraphvizState Char
character Char
'+' Parser GraphvizState Char
-> Parser GraphvizState [()] -> Parser GraphvizState [()]
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser GraphvizState [()]
parseSep
    parseSep :: Parser GraphvizState [()]
parseSep = Parse () -> Parser GraphvizState [()]
forall a. Parser GraphvizState a -> Parser GraphvizState [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parse () -> Parser GraphvizState [()])
-> Parse () -> Parser GraphvizState [()]
forall a b. (a -> b) -> a -> b
$ Parse ()
whitespace1 Parse () -> Parse () -> Parse ()
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail` Parse ()
forall m. Monoid m => Parse m
parseUnwanted
    wrapQuotes :: Builder -> Builder
wrapQuotes Builder
str = Builder
qc Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
str Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
qc
    qc :: Builder
qc = Char -> Builder
B.singleton Char
'"'

-- | Lines can be split with a @\\@ at the end of the line.
parseSplitLine :: (Monoid m) => Parse m
parseSplitLine :: forall m. Monoid m => Parse m
parseSplitLine = Char -> Parser GraphvizState Char
character Char
'\\' Parser GraphvizState Char -> Parse () -> Parse ()
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parse ()
newline Parse () -> Parser GraphvizState m -> Parser GraphvizState m
forall a b.
Parser GraphvizState a
-> Parser GraphvizState b -> Parser GraphvizState b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m -> Parser GraphvizState m
forall a. a -> Parser GraphvizState a
forall (f :: * -> *) a. Applicative f => a -> f a
pure m
forall a. Monoid a => a
mempty

parseHTML :: Parse Builder
parseHTML :: Parse Builder
parseHTML = ([Builder] -> Builder)
-> Parser GraphvizState [Builder] -> Parse Builder
forall a b.
(a -> b) -> Parser GraphvizState a -> Parser GraphvizState b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Builder -> Builder
addAngled (Builder -> Builder)
-> ([Builder] -> Builder) -> [Builder] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat)
            (Parser GraphvizState [Builder] -> Parse Builder)
-> (Parser GraphvizState [Builder]
    -> Parser GraphvizState [Builder])
-> Parser GraphvizState [Builder]
-> Parse Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser GraphvizState [Builder] -> Parser GraphvizState [Builder]
forall a. Parser GraphvizState a -> Parser GraphvizState a
parseAngled (Parser GraphvizState [Builder] -> Parse Builder)
-> Parser GraphvizState [Builder] -> Parse Builder
forall a b. (a -> b) -> a -> b
$ Parse Builder -> Parser GraphvizState [Builder]
forall a. Parser GraphvizState a -> Parser GraphvizState [a]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parse Builder
inner
  where
    inner :: Parse Builder
inner = Parse Builder
parseHTML
            Parse Builder -> Parse Builder -> Parse Builder
forall s a. Parser s a -> Parser s a -> Parser s a
`onFail`
            (Text -> Builder
B.fromLazyText (Text -> Builder) -> Parser GraphvizState Text -> Parse Builder
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser GraphvizState Text
forall s. (Char -> Bool) -> Parser s Text
many1Satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
open Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
close))
    addAngled :: Builder -> Builder
addAngled Builder
str = Char -> Builder
B.singleton Char
open Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
str Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Char -> Builder
B.singleton Char
close
    open :: Char
open = Char
'<'
    close :: Char
close = Char
'>'