{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NamedFieldPuns #-}
module Xml
( Node (..)
, Content (..)
, Attribute (..)
, decode
) where
import Data.Builder.ST (Builder)
import Data.Bytes (Bytes)
import Data.Bytes.Parser (Parser)
import Data.Chunks (Chunks)
import Data.Primitive (SmallArray)
import Data.Text.Short (ShortText)
import Data.Word (Word8)
import GHC.Exts (Char (C#), Char#)
import qualified Data.Builder.ST as Builder
import qualified Data.Bytes as Bytes
import qualified Data.Bytes.Parser as Parser
import qualified Data.Bytes.Parser.Latin as Latin
import qualified Data.Bytes.Parser.Rebindable as R
import qualified Data.Bytes.Parser.Unsafe as Unsafe
import qualified Data.Bytes.Parser.Utf8 as Utf8
import qualified Data.Chunks as Chunks
import qualified Data.Text.Short as TS
import qualified Data.Text.Short.Unsafe as TS
data Node
= Text !ShortText
| Element {-# UNPACK #-} !Content
deriving (Int -> Node -> ShowS
[Node] -> ShowS
Node -> String
(Int -> Node -> ShowS)
-> (Node -> String) -> ([Node] -> ShowS) -> Show Node
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Node -> ShowS
showsPrec :: Int -> Node -> ShowS
$cshow :: Node -> String
show :: Node -> String
$cshowList :: [Node] -> ShowS
showList :: [Node] -> ShowS
Show, Node -> Node -> Bool
(Node -> Node -> Bool) -> (Node -> Node -> Bool) -> Eq Node
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Node -> Node -> Bool
== :: Node -> Node -> Bool
$c/= :: Node -> Node -> Bool
/= :: Node -> Node -> Bool
Eq)
data Content = Content
{ Content -> ShortText
tag :: !ShortText
, Content -> SmallArray Attribute
attributes :: !(SmallArray Attribute)
, Content -> SmallArray Node
children :: !(SmallArray Node)
}
deriving (Int -> Content -> ShowS
[Content] -> ShowS
Content -> String
(Int -> Content -> ShowS)
-> (Content -> String) -> ([Content] -> ShowS) -> Show Content
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Content -> ShowS
showsPrec :: Int -> Content -> ShowS
$cshow :: Content -> String
show :: Content -> String
$cshowList :: [Content] -> ShowS
showList :: [Content] -> ShowS
Show, Content -> Content -> Bool
(Content -> Content -> Bool)
-> (Content -> Content -> Bool) -> Eq Content
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Content -> Content -> Bool
== :: Content -> Content -> Bool
$c/= :: Content -> Content -> Bool
/= :: Content -> Content -> Bool
Eq)
data Attribute = Attribute
{ Attribute -> ShortText
name :: !ShortText
, Attribute -> ShortText
value :: !ShortText
}
deriving (Int -> Attribute -> ShowS
[Attribute] -> ShowS
Attribute -> String
(Int -> Attribute -> ShowS)
-> (Attribute -> String)
-> ([Attribute] -> ShowS)
-> Show Attribute
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Attribute -> ShowS
showsPrec :: Int -> Attribute -> ShowS
$cshow :: Attribute -> String
show :: Attribute -> String
$cshowList :: [Attribute] -> ShowS
showList :: [Attribute] -> ShowS
Show, Attribute -> Attribute -> Bool
(Attribute -> Attribute -> Bool)
-> (Attribute -> Attribute -> Bool) -> Eq Attribute
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Attribute -> Attribute -> Bool
== :: Attribute -> Attribute -> Bool
$c/= :: Attribute -> Attribute -> Bool
/= :: Attribute -> Attribute -> Bool
Eq)
decode :: Bytes -> Maybe Node
decode :: Bytes -> Maybe Node
decode !Bytes
b = (forall s. Parser () s Node) -> Bytes -> Maybe Node
forall e a. (forall s. Parser e s a) -> Bytes -> Maybe a
Parser.parseBytesMaybe Parser () s Node
forall s. Parser () s Node
elementNodeParser Bytes
b
elementNodeParser :: Parser () s Node
elementNodeParser :: forall s. Parser () s Node
elementNodeParser = do
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'<'
Bytes
btag <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Bool -> Bool
not (Word8 -> Bool
isXmlSpace Word8
w) Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3E Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x2F)
case Bytes -> Int
Bytes.length Bytes
btag of
Int
0 -> () -> Parser () s ()
forall e s a. e -> Parser e s a
Parser.fail ()
Int
_ -> () -> Parser () s ()
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
ShortText
tag <- case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
btag) of
Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
Just ShortText
ttag -> ShortText -> Parser () s ShortText
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
ttag
Chunks Attribute
attrs <- Builder s Attribute -> Parser () s (Chunks Attribute)
forall s. Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes (Builder s Attribute -> Parser () s (Chunks Attribute))
-> Parser () s (Builder s Attribute)
-> Parser () s (Chunks Attribute)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< ST s (Builder s Attribute) -> Parser () s (Builder s Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect ST s (Builder s Attribute)
forall s a. ST s (Builder s a)
Builder.new
let !attributes :: SmallArray Attribute
attributes = Chunks Attribute -> SmallArray Attribute
forall a. Chunks a -> SmallArray a
Chunks.concat Chunks Attribute
attrs
() -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char -> (Char -> Parser () s Node) -> Parser () s Node
forall a b. Parser () s a -> (a -> Parser () s b) -> Parser () s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Char
'>' -> do
Chunks Node
nodes <- ShortText -> Parser () s (Chunks Node)
forall s. ShortText -> Parser () s (Chunks Node)
childrenParser ShortText
tag
Node -> Parser () s Node
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Content -> Node
Element Content {ShortText
tag :: ShortText
tag :: ShortText
tag, SmallArray Attribute
attributes :: SmallArray Attribute
attributes :: SmallArray Attribute
attributes, children :: SmallArray Node
children = Chunks Node -> SmallArray Node
forall a. Chunks a -> SmallArray a
Chunks.concat Chunks Node
nodes})
Char
'/' -> do
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'>'
Node -> Parser () s Node
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Content -> Node
Element Content {ShortText
tag :: ShortText
tag :: ShortText
tag, SmallArray Attribute
attributes :: SmallArray Attribute
attributes :: SmallArray Attribute
attributes, children :: SmallArray Node
children = SmallArray Node
forall a. Monoid a => a
mempty})
Char
_ -> () -> Parser () s Node
forall e s a. e -> Parser e s a
Parser.fail ()
textNodeParser :: Parser () s Node
textNodeParser :: forall s. Parser () s Node
textNodeParser = do
Bytes
raw <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3C)
case (Word8 -> Bool) -> Bytes -> Bool
Bytes.any (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x7F Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x26) Bytes
raw of
Bool
True -> () -> Parser () s Node
forall e s a. e -> Parser e s a
Parser.fail ()
Bool
False -> Node -> Parser () s Node
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortText -> Node
Text (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
raw)))
childrenParser ::
ShortText ->
Parser () s (Chunks Node)
childrenParser :: forall s. ShortText -> Parser () s (Chunks Node)
childrenParser !ShortText
tag = do
Builder s Node
b0 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect ST s (Builder s Node)
forall s a. ST s (Builder s a)
Builder.new
ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b0
childrenParserLoop ::
ShortText ->
Builder s Node ->
Parser () s (Chunks Node)
childrenParserLoop :: forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop !ShortText
tag !Builder s Node
b0 =
() -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s (Chunks Node)) -> Parser () s (Chunks Node)
forall a b. Parser () s a -> (a -> Parser () s b) -> Parser () s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Char
'<' ->
() -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s (Chunks Node)) -> Parser () s (Chunks Node)
forall a b. Parser () s a -> (a -> Parser () s b) -> Parser () s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Char
'/' -> do
() -> ShortText -> Parser () s ()
forall e s. e -> ShortText -> Parser e s ()
Utf8.shortText () ShortText
tag
(Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'>'
ST s (Chunks Node) -> Parser () s (Chunks Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Builder s Node -> ST s (Chunks Node)
forall s a. Builder s a -> ST s (Chunks a)
Builder.freeze Builder s Node
b0)
Char
_ -> do
Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.unconsume Int
2
Node
node <- Parser () s Node
forall s. Parser () s Node
elementNodeParser
Builder s Node
b1 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Node -> Builder s Node -> ST s (Builder s Node)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Node
node Builder s Node
b0)
ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b1
Char
_ -> do
Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.unconsume Int
1
Node
node <- Parser () s Node
forall s. Parser () s Node
textNodeParser
Builder s Node
b1 <- ST s (Builder s Node) -> Parser () s (Builder s Node)
forall s a e. ST s a -> Parser e s a
Parser.effect (Node -> Builder s Node -> ST s (Builder s Node)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Node
node Builder s Node
b0)
ShortText -> Builder s Node -> Parser () s (Chunks Node)
forall s. ShortText -> Builder s Node -> Parser () s (Chunks Node)
childrenParserLoop ShortText
tag Builder s Node
b1
isXmlSpace :: Word8 -> Bool
isXmlSpace :: Word8 -> Bool
isXmlSpace = \case
Word8
0x20 -> Bool
True
Word8
0x09 -> Bool
True
Word8
0x0D -> Bool
True
Word8
0x0A -> Bool
True
Word8
_ -> Bool
False
parserAttributes :: Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes :: forall s. Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes !Builder s Attribute
b0 = do
(Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
Parser () s Bool
forall s. Parser () s Bool
peekIsNameStartChar Parser () s Bool
-> (Bool -> Parser () s (Chunks Attribute))
-> Parser () s (Chunks Attribute)
forall a b. Parser () s a -> (a -> Parser () s b) -> Parser () s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Bool
True -> do
Attribute
attr <- Parser () s Attribute
forall s. Parser () s Attribute
parserAttribute
Builder s Attribute
b1 <- ST s (Builder s Attribute) -> Parser () s (Builder s Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect (Attribute -> Builder s Attribute -> ST s (Builder s Attribute)
forall a s. a -> Builder s a -> ST s (Builder s a)
Builder.push Attribute
attr Builder s Attribute
b0)
Builder s Attribute -> Parser () s (Chunks Attribute)
forall s. Builder s Attribute -> Parser () s (Chunks Attribute)
parserAttributes Builder s Attribute
b1
Bool
False -> do
(Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
ST s (Chunks Attribute) -> Parser () s (Chunks Attribute)
forall s a e. ST s a -> Parser e s a
Parser.effect (Builder s Attribute -> ST s (Chunks Attribute)
forall s a. Builder s a -> ST s (Chunks a)
Builder.freeze Builder s Attribute
b0)
parserAttribute :: Parser () s Attribute
parserAttribute :: forall s. Parser () s Attribute
parserAttribute = do
Bytes
bname <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Bool -> Bool
not (Word8 -> Bool
isXmlSpace Word8
w) Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x3D)
!ShortText
name <- case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bname) of
Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
Just ShortText
tname -> ShortText -> Parser () s ShortText
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tname
(Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'='
(Word8 -> Bool) -> Parser () s ()
forall e s. (Word8 -> Bool) -> Parser e s ()
Parser.skipWhile Word8 -> Bool
isXmlSpace
!ShortText
value <- Parser () s ShortText
forall s. Parser () s ShortText
parserAttributeValue
Attribute -> Parser () s Attribute
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Attribute {ShortText
name :: ShortText
name :: ShortText
name, ShortText
value :: ShortText
value :: ShortText
value}
parserAttributeValue :: Parser () s ShortText
parserAttributeValue :: forall s. Parser () s ShortText
parserAttributeValue = do
() -> Parser () s Char
forall e s. e -> Parser e s Char
Latin.any () Parser () s Char
-> (Char -> Parser () s ShortText) -> Parser () s ShortText
forall a b. Parser () s a -> (a -> Parser () s b) -> Parser () s b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Char
'"' -> do
Bytes
bval <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x22)
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'"'
case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bval) of
Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
Just ShortText
tval -> ShortText -> Parser () s ShortText
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tval
Char
'\'' -> do
Bytes
bval <- (Word8 -> Bool) -> Parser () s Bytes
forall e s. (Word8 -> Bool) -> Parser e s Bytes
Parser.takeWhile (\Word8
w -> Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x27)
() -> Char -> Parser () s ()
forall e s. e -> Char -> Parser e s ()
Latin.char () Char
'\''
case ShortByteString -> Maybe ShortText
TS.fromShortByteString (Bytes -> ShortByteString
Bytes.toShortByteStringClone Bytes
bval) of
Maybe ShortText
Nothing -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
Just ShortText
tval -> ShortText -> Parser () s ShortText
forall a. a -> Parser () s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ShortText
tval
Char
_ -> () -> Parser () s ShortText
forall e s a. e -> Parser e s a
Parser.fail ()
peekIsNameStartChar :: Parser () s Bool
peekIsNameStartChar :: forall s. Parser () s Bool
peekIsNameStartChar =
Parser () s Int
forall e s. Parser e s Int
Unsafe.cursor Parser () s Int -> (Int -> Parser () s Bool) -> Parser () s Bool
forall e s a b. Parser e s a -> (a -> Parser e s b) -> Parser e s b
forall e s a b.
Bind LiftedRep LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \Int
pos ->
() -> Parser () s Char#
forall e s. e -> Parser e s Char#
Utf8.any# () Parser () s Char#
-> (Char# -> Parser () s Bool) -> Parser () s Bool
forall e s (a :: TYPE 'WordRep) b.
Parser e s a -> (a -> Parser e s b) -> Parser e s b
forall e s a b.
Bind LiftedRep LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \Char#
c ->
Int -> Parser () s ()
forall e s. Int -> Parser e s ()
Unsafe.jump Int
pos Parser () s () -> (() -> Parser () s Bool) -> Parser () s Bool
forall e s a b. Parser e s a -> (a -> Parser e s b) -> Parser e s b
forall e s a b.
Bind LiftedRep LiftedRep =>
Parser e s a -> (a -> Parser e s b) -> Parser e s b
R.>>= \()
_ ->
Bool -> Parser () s Bool
forall e s a. a -> Parser e s a
forall e s a. Pure LiftedRep => a -> Parser e s a
R.pure (Char# -> Bool
isNameStartChar Char#
c)
isNameStartChar :: Char# -> Bool
isNameStartChar :: Char# -> Bool
isNameStartChar Char#
c = case Char# -> Char
C# Char#
c of
Char
':' -> Bool
True
Char
'_' -> Bool
True
Char
_ | Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' -> Bool
True
Char
_ | Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a' Bool -> Bool -> Bool
&& Char# -> Char
C# Char#
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' -> Bool
True
Char
_ -> Bool
False