{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}

module Text.RDF.RDF4H.XmlParser.Identifiers
  ( -- rdf:ID validation
    checkRdfId,
    -- Qualified names
    resolveQName,
    resolveQName',
    parseQName,
  )
where

#if !MIN_VERSION_base(4,13,0)
import           Data.Functor ((<$))
#else
#endif
import Control.Applicative (Alternative (..))
import Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as P
import qualified Data.Map as Map
import Data.Text (Text)
import qualified Data.Text as T
#if MIN_VERSION_base(4,9,0)
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup ((<>))
#else
#endif
#else
#endif
import Data.Char (isAsciiLower, isAsciiUpper, isDigit)
import Data.RDF.Namespace

--------------------------------------------------------------------------------
-- rdf:ID

-- | Validate the value of @rdf:ID@.
--
--  See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id
checkRdfId ::
  -- | Value of a @rdf:ID@ attribute to validate.
  Text ->
  Either String Text
checkRdfId :: Text -> Either String Text
checkRdfId Text
t = Text
t Text -> Either String Text -> Either String Text
forall a b. a -> Either String b -> Either String a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Text -> Either String Text
parseId Text
t

parseId :: Text -> Either String Text
parseId :: Text -> Either String Text
parseId = Parser Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
P.parseOnly (Parser Text -> Text -> Either String Text)
-> Parser Text -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Parser Text
pNCName Parser Text -> Parser Text () -> Parser Text
forall a b. Parser Text a -> Parser Text b -> Parser Text a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser Text () -> String -> Parser Text ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Unexpected characters at the end")

--------------------------------------------------------------------------------
-- Qualified names

-- | Parse and resolve a qualified name.
--
--  See: https://www.w3.org/TR/xml-names/#ns-qualnames
resolveQName ::
  -- | Namespace mapping to resolve q qualified name.
  PrefixMappings ->
  -- | Raw qualified name to process.
  Text ->
  Either String Text
resolveQName :: PrefixMappings -> Text -> Either String Text
resolveQName PrefixMappings
pm Text
qn = Text -> Either String (Maybe Text, Text)
parseQName Text
qn Either String (Maybe Text, Text)
-> ((Maybe Text, Text) -> Either String Text) -> Either String Text
forall a b.
Either String a -> (a -> Either String b) -> Either String b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= PrefixMappings -> (Maybe Text, Text) -> Either String Text
resolveQName' PrefixMappings
pm

-- | Resolve a qualified name.
resolveQName' ::
  -- | Namespace mapping to resolve q qualified name.
  PrefixMappings ->
  -- | (namespace, local name)
  (Maybe Text, Text) ->
  Either String Text
resolveQName' :: PrefixMappings -> (Maybe Text, Text) -> Either String Text
resolveQName' (PrefixMappings Map Text Text
pm) (Maybe Text
Nothing, Text
name) =
  case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
forall a. Monoid a => a
mempty Map Text Text
pm of
    Maybe Text
Nothing -> String -> Either String Text
forall a b. a -> Either a b
Left (String -> Either String Text) -> String -> Either String Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"Cannot resolve QName \"", Text -> String
T.unpack Text
name, String
"\": no default namespace defined."]
    Just Text
iri -> Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Text
iri Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name
resolveQName' (PrefixMappings Map Text Text
pm) (Just Text
prefix, Text
name) =
  case Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
prefix Map Text Text
pm of
    Maybe Text
Nothing -> String -> Either String Text
forall a b. a -> Either a b
Left (String -> Either String Text) -> String -> Either String Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"Cannot resolve QName: prefix \"", Text -> String
T.unpack Text
prefix, String
"\" not defined"]
    Just Text
iri -> Text -> Either String Text
forall a b. b -> Either a b
Right (Text -> Either String Text) -> Text -> Either String Text
forall a b. (a -> b) -> a -> b
$ Text
iri Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name

-- | Parse a qualified name.
--
--  See: https://www.w3.org/TR/xml-names/#ns-qualnames
parseQName :: Text -> Either String (Maybe Text, Text)
parseQName :: Text -> Either String (Maybe Text, Text)
parseQName = Parser (Maybe Text, Text)
-> Text -> Either String (Maybe Text, Text)
forall a. Parser a -> Text -> Either String a
P.parseOnly (Parser (Maybe Text, Text)
 -> Text -> Either String (Maybe Text, Text))
-> Parser (Maybe Text, Text)
-> Text
-> Either String (Maybe Text, Text)
forall a b. (a -> b) -> a -> b
$ Parser (Maybe Text, Text)
pQName Parser (Maybe Text, Text)
-> Parser Text () -> Parser (Maybe Text, Text)
forall a b. Parser Text a -> Parser Text b -> Parser Text a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser Text () -> String -> Parser Text ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Unexpected characters at the end of a QName")

-- https://www.w3.org/TR/xml-names/#ns-qualnames
-- https://www.w3.org/TR/xml-names/#NT-QName
pQName :: Parser (Maybe Text, Text)
pQName :: Parser (Maybe Text, Text)
pQName = Parser (Maybe Text, Text)
pPrefixedName Parser (Maybe Text, Text)
-> Parser (Maybe Text, Text) -> Parser (Maybe Text, Text)
forall a. Parser Text a -> Parser Text a -> Parser Text a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Maybe Text, Text)
forall {a}. Parser Text (Maybe a, Text)
pUnprefixedNamed
  where
    pUnprefixedNamed :: Parser Text (Maybe a, Text)
pUnprefixedNamed = (Maybe a
forall a. Maybe a
forall (f :: * -> *) a. Alternative f => f a
empty,) (Text -> (Maybe a, Text))
-> Parser Text -> Parser Text (Maybe a, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
pLocalPart

-- https://www.w3.org/TR/xml-names/#NT-PrefixedName
pPrefixedName :: Parser (Maybe Text, Text)
pPrefixedName :: Parser (Maybe Text, Text)
pPrefixedName = do
  Text
prefix <- Parser Text
pLocalPart Parser Text -> Parser Text Char -> Parser Text
forall a b. Parser Text a -> Parser Text b -> Parser Text a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
P.char Char
':'
  Text
localPart <- Parser Text
pLocalPart
  (Maybe Text, Text) -> Parser (Maybe Text, Text)
forall a. a -> Parser Text a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
prefix, Text
localPart)

-- https://www.w3.org/TR/xml-names/#NT-LocalPart
pLocalPart :: Parser Text
pLocalPart :: Parser Text
pLocalPart = Parser Text
pNCName

-- http://www.w3.org/TR/REC-xml-names/#NT-NCName
pNCName :: Parser Text
pNCName :: Parser Text
pNCName = (Char -> Text -> Text)
-> Parser Text Char -> Parser Text -> Parser Text
forall a b c.
(a -> b -> c) -> Parser Text a -> Parser Text b -> Parser Text c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Char -> Text -> Text
T.cons Parser Text Char
pNameStartChar Parser Text
pNameRest
  where
    pNameStartChar :: Parser Text Char
pNameStartChar = (Char -> Bool) -> Parser Text Char
P.satisfy Char -> Bool
isValidFirstCharId
    pNameRest :: Parser Text
pNameRest = (Char -> Bool) -> Parser Text
P.takeWhile Char -> Bool
isValidRestCharId
    isValidFirstCharId :: Char -> Bool
isValidFirstCharId Char
c =
      Char -> Bool
isAsciiUpper Char
c
        Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
        Bool -> Bool -> Bool
|| Char -> Bool
isAsciiLower Char
c
        Bool -> Bool -> Bool
|| (Char
'\xC0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6')
        Bool -> Bool -> Bool
|| (Char
'\xD8' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6')
        Bool -> Bool -> Bool
|| (Char
'\xF8' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF')
        Bool -> Bool -> Bool
|| (Char
'\x370' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D')
        Bool -> Bool -> Bool
|| (Char
'\x37F' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF')
        Bool -> Bool -> Bool
|| (Char
'\x200C' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D')
        Bool -> Bool -> Bool
|| (Char
'\x2070' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F')
        Bool -> Bool -> Bool
|| (Char
'\x2C00' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF')
        Bool -> Bool -> Bool
|| (Char
'\x3001' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF')
        Bool -> Bool -> Bool
|| (Char
'\xF900' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF')
        Bool -> Bool -> Bool
|| (Char
'\xFDF0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD')
        Bool -> Bool -> Bool
|| (Char
'\x10000' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')
    isValidRestCharId :: Char -> Bool
isValidRestCharId Char
c =
      Char -> Bool
isValidFirstCharId Char
c
        Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-'
        Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.'
        Bool -> Bool -> Bool
|| Char -> Bool
isDigit Char
c
        Bool -> Bool -> Bool
|| (Char
'\x0300' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x036F')
        Bool -> Bool -> Bool
|| (Char
'\x203F' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')