{-# LANGUAGE GADTs                      #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE ScopedTypeVariables        #-}
{-# LANGUAGE ViewPatterns               #-}

{-# OPTIONS_GHC -Wall -Werror -fno-warn-orphans -fno-warn-unused-imports #-}

module Text.HTML.ParserSpec
where

import Control.Applicative
import Data.Monoid
import Data.List ((\\))
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as B
import           Test.Hspec
import           Test.QuickCheck
import           Test.QuickCheck.Instances ()
import           Text.HTML.Parser


instance Arbitrary Token where
  arbitrary = oneof [validOpen, validClose, validFlat]

  shrink (TagOpen n as)      = TagOpen n <$> shrink as
  shrink (TagSelfClose n as) = TagSelfClose n <$> shrink as
  shrink (TagClose _)        = []
  shrink (ContentText _)     = []
  shrink (ContentChar _)     = []
  shrink (Comment b)         = Comment . B.fromText <$> (shrink . TL.toStrict . B.toLazyText $ b)
  shrink (Doctype t)         = Doctype <$> shrink t

instance Arbitrary Attr where
  arbitrary = Attr <$> validXmlAttrName <*> validXmlAttrValue
  shrink (Attr k v) = Attr <$> shrink k <*> shrink v

validOpen :: Gen Token
validOpen = TagOpen <$> validXmlTagName <*> arbitrary

validClose :: Gen Token
validClose = TagClose <$> validXmlTagName

validFlat :: Gen Token
validFlat = oneof
    [ TagSelfClose <$> validXmlTagName <*> arbitrary
    , ContentChar <$> validXmlChar
    , ContentText <$> validXmlText
    , Comment . B.fromText <$> validXmlCommentText
    , Doctype <$> validXmlText
    ]

-- FIXME: sometimes it is allowed to use '<' as text token, and we don't test that yet.  (whether we
-- like this choice or not, we may want to follow the standard here.)  (same in tag names, attr
-- names.)
validXmlChar :: Gen Char
validXmlChar = elements (['\x20'..'\x7E'] \\ "\x09\x0a\x0c /<>")

validXmlText :: Gen T.Text
validXmlText = T.pack <$> sized (`maxListOf` validXmlChar)

validXmlTagName :: Gen T.Text
validXmlTagName = do
    initchar  <- elements $ ['a'..'z'] <> ['A'..'Z']
    thenchars <- sized (`maxListOf` elements (['\x20'..'\x7E'] \\ "\x09\x0a\x0c /<>"))
    pure . T.pack $ initchar : thenchars

validXmlAttrName :: Gen T.Text
validXmlAttrName = do
    initchar  <- elements $ ['a'..'z'] <> ['A'..'Z']
    thenchars <- sized (`maxListOf` elements (['\x20'..'\x7E'] \\ "\x09\x0a\x0c /=<>\x00"))
    pure . T.pack $ initchar : thenchars

-- FIXME: not sure if @Attr "key" "\""@ should be parseable, but it's not, so we don't test it.
validXmlAttrValue :: Gen T.Text
validXmlAttrValue = do
    T.pack <$> sized (`maxListOf` elements (['\x20'..'\x7E'] \\ "\x09\x0a\x0c /=<>\x00\""))

-- FIXME: i think this should be 'validXmlChar', but that will fail the test suite.
validXmlCommentText :: Gen T.Text
validXmlCommentText = do
    T.pack <$> sized (`maxListOf` elements (['\x20'..'\x7E'] \\ "\x09\x0a\x0c /=<>\x00\"-"))

maxListOf :: Int -> Gen a -> Gen [a]
maxListOf n g = take n <$> listOf g


spec :: Spec
spec = do
  it "parseTokens and renderTokens are inverse" . property . forAllShrink arbitrary shrink $
    \(canonicalizeTokens -> tokens)
      -> (parseTokens . TL.toStrict . renderTokens $ tokens) `shouldBe` tokens

  it "canonicalizeTokens is idempotent" . property . forAllShrink arbitrary shrink $
    \tokens
      -> canonicalizeTokens tokens `shouldBe` canonicalizeTokens (canonicalizeTokens tokens)

  describe "regression tests" $ do
    describe "parseTokens" $ do
      it "works on `<h1>Heading</h1>`" $ do
        parseTokens "<h1>Heading</h1>" `shouldBe` [TagOpen "h1" [], ContentText "Heading", TagClose "h1"]