module Text.SXML
  ( 
  readFile
  
  ,parseString
  
  ,parseText
  ,parseLazyText
  
  ,writeFile
  
  ,renderString
  
  ,renderText
  ,renderLazyText
  ) where
import Data.Monoid
import Data.Maybe
import Data.List
import Text.SXML.Internal
import Text.ParserCombinators.Poly.State
import Prelude hiding (readFile, writeFile)
import Control.Applicative hiding ((<|>),many)
import qualified System.IO as IO
import qualified Data.XML.Types as XT
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.IO as TIO
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text as TS
import qualified Data.Map as M
type SXMLParser a = Parser [M.Map TS.Text (TS.Text, Maybe TS.Text)] Token a 
                                                                            
                                                                            
readFile :: FilePath -> IO (Either String XT.Document)
readFile file = do
  sxml <- IO.readFile file
  return $ parseString sxml
parseText :: TS.Text -> Either String XT.Document
parseText = parseString . TS.unpack
parseLazyText :: T.Text -> Either String XT.Document
parseLazyText = parseString . T.unpack
parseString :: String -> Either String XT.Document
parseString txt = f3 $ runParser getSXML [M.empty] $ lexer txt
  where f3 (x,_,_) = x
writeFile :: FilePath -> XT.Document -> IO ()
writeFile file doc = TIO.writeFile file (renderLazyText doc)
renderText :: XT.Document -> TS.Text
renderText = T.toStrict . renderLazyText
renderString :: XT.Document -> String
renderString = T.unpack . renderLazyText
renderLazyText :: XT.Document -> T.Text
renderLazyText = TB.toLazyText . builderDoc
  where tagend = TB.singleton ')'
        tagstart = TB.singleton '('
        space = TB.singleton ' '
        empty = TB.fromString ""
        builderString txt =
          let toSchemeStringBuilder txt = fold_mod ((<>) . (<> TB.fromString "\\\\")) splitDoubleQuotes (TS.split (=='\\') txt)
              splitDoubleQuotes txt = fold_mod ((<>) . (<> TB.fromString "\\\"")) splitCarriageReturns (TS.split (=='"') txt)
              splitCarriageReturns txt = fold_mod ((<>) . (<> TB.fromString "\\r")) splitLineFeeds (TS.split (=='\r') txt)
              splitLineFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\n")) splitFormFeeds (TS.split (=='\n') txt)
              splitFormFeeds txt = fold_mod ((<>) . (<> TB.fromString "\\f")) splitBackspaces (TS.split (=='\f') txt)
              splitBackspaces txt = fold_mod ((<>) . (<> TB.fromString "\\b")) splitTabulations (TS.split (=='\b') txt)
              splitTabulations txt = fold_mod ((<>) . (<> TB.fromString "\\t")) splitVerticalTab (TS.split (=='\t') txt)
              splitVerticalTab txt = fold_mod ((<>) . (<> TB.fromString "\\v")) TB.fromText (TS.split (=='\v') txt)
              fold_mod f t l = foldr1 f (map t l)
              stringsep = TB.singleton '"'
          in stringsep <> toSchemeStringBuilder txt <> stringsep
        nameToBuilder n = maybe empty ((<> TB.singleton ':') . TB.fromText) (XT.nameNamespace n) <> (TB.fromText . XT.nameLocalName $ n)
        builderDoc doc = TB.fromString "(*TOP*" <> (builderPrologue . sortBy prologueSorter $ ((XT.prologueBefore . XT.documentPrologue $ doc) ++ (XT.prologueAfter . XT.documentPrologue $ doc) ++ XT.documentEpilogue doc)) <> (builderElem . XT.documentRoot $ doc) <> tagend
        builderElem el = tagstart <> (nameToBuilder . XT.elementName $ el) <> space <> (builderAttrList . XT.elementAttributes $ el) <> (builderChildren . XT.elementNodes $ el) <> tagend
        builderContent (XT.ContentText t) = builderString t
        builderContent (XT.ContentEntity e) = TB.fromString "(*ENTITY* " <> TB.fromText e <> TB.fromString " \"\"" <> tagend
        builderInstruction (XT.Instruction tgt dat) = TB.fromString "(*PI* " <> TB.fromText tgt <> space <> builderString dat <> tagend
        builderComment ct = TB.fromString "(*COMMENT* " <> builderString ct <> tagend
        builderAttrList [] = empty
        builderAttrList al =
          let helperAttr [] = tagend
              helperAttr ((n,cs):ats) = tagstart <> nameToBuilder n <> helperAttrVal cs <> helperAttr ats
              helperAttrVal = foldr ((<>) . (space <>) . builderContent) tagend
          in TB.fromString "(@ " <> helperAttr al
        builderChildren [] = empty
        builderChildren (c:cs) =
          let helperChild (XT.NodeComment cmt) = builderComment cmt
              helperChild (XT.NodeInstruction instr) = builderInstruction instr
              helperChild (XT.NodeContent cnt) = builderContent cnt
              helperChild (XT.NodeElement elt) = builderElem elt
          in space <> helperChild c <> builderChildren cs
        builderPrologue [] = space
        builderPrologue (e:es) =
          let helperPrologue (XT.MiscInstruction instr) = builderInstruction instr
              helperPrologue (XT.MiscComment cmt) = builderComment cmt
          in space <> helperPrologue e <> builderPrologue es
        prologueSorter (XT.MiscInstruction _) (XT.MiscComment _) = GT
        prologueSorter (XT.MiscComment _) (XT.MiscInstruction _) = LT
        prologueSorter _ _ = EQ
getSXML :: SXMLParser XT.Document
getSXML = do
  lparen
  root
  pr <- prologue
  rEl <- element
  rparen
  return $ XT.Document pr rEl []
prologue :: SXMLParser XT.Prologue
prologue = do
  optional annotations
  pis <- many prologueProcInstr
  comments <- many prologueComment
  return $ XT.Prologue (pis ++ comments) Nothing []
prologueComment = do
  c <- commentHelper
  return $ XT.MiscComment c
comment = do
  c <- commentHelper
  return $ XT.NodeComment c
commentHelper = parens (cmt >> strTok)
prologueProcInstr = do
  inst <- prologueHelper
  return $ XT.MiscInstruction inst
procInstr = do
  inst <- prologueHelper
  return $ XT.NodeInstruction inst
prologueHelper = parens $ do
  pinst
  target <- nodeTok
  optional annotations
  instr <- strTok
  return $ XT.Instruction target instr
annotations = do
  lparen
  atl
  namespaces
  many annotation
  rparen
  return ()
annotation = do
  lparen
  name <- nodeTok
  target <- nodeTok <|> strTok
  rparen
  return ()
namespaces = do
  lparen
  nsl
  many namespace
  rparen
  return ()
namespace = do
  lparen
  nid <- nodeTok
  uri <- strTok
  oid <- optional nodeTok
  rparen
  stUpdate (\(cns:stns) -> (M.insert nid (uri, oid) cns : stns))
  return ()
helperName :: TS.Text -> SXMLParser XT.Name
helperName qname =
  let (prefv, uqnamev) = TS.breakOnEnd sep qname
      (pref, uqname) = if TS.null prefv then
                         (TS.empty, uqnamev)
                       else
                         (TS.init prefv, uqnamev)
  in do
       nss <- stGet
       if TS.null pref then
         return $ XT.Name uqname Nothing Nothing
       else
         let lval = lookupS pref nss
         in case lval of
              Nothing -> return $ XT.Name uqname (Just pref) Nothing
              Just (uri, _) -> return $ XT.Name uqname (Just uri) (Just pref)
element :: SXMLParser XT.Element
element = do
  lparen
  eName <- nodeTok
  stUpdate (M.empty:)
  attrs <- optional annotAttributes
  let attrs' = fromMaybe [] attrs
  children <- many childOfElement
  rparen
  ename <- helperName eName
  stUpdate tail
  return $ XT.Element ename attrs' children
childOfElement = oneOf [elemText, comment, procInstr, entity, elementChild]
elementChild = do
  el <- element
  return $ XT.NodeElement el
attribute = do
  lparen
  name <- nodeTok
  val <- optional strTok
  optional annotations
  rparen
  atname <- helperName name
  case val of
    Nothing -> return (atname, [XT.ContentText $ XT.nameLocalName atname])
    Just v -> return (atname, [XT.ContentText v])
annotAttributes = do
  lparen
  atl
  attrs <- many attribute
  optional annotations
  rparen
  return attrs
entity = do
  lparen
  ent
  pid <- strTok
  sid <- strTok
  rparen
  return $ XT.NodeContent $ XT.ContentEntity pid
elemText = do
  val <- strTok
  return $ XT.NodeContent $ XT.ContentText val
root =
  satisfy' cdr "DocRoot"
  where cdr tok = case tok of
                    DocRoot -> True
                    _ -> False
cmt =
  satisfy' cct "Comment"
  where cct tok = case tok of
                    Comment -> True
                    _ -> False
ent =
  satisfy' cet "Entity"
  where cet tok = case tok of
                    Entity -> True
                    _ -> False
nsl =
  satisfy' cns "NamespacesList"
  where cns tok = case tok of
                    NamespacesList -> True
                    _ -> False
atl =
  satisfy' cal "AttributesList"
  where cal tok = case tok of
                    AttributesList -> True
                    _ -> False
pinst =
  satisfy' cpi "PI"
  where cpi tok = case tok of
                    PI -> True
                    _ -> False
lparen =
  satisfy' opp "OpenParen"
  where opp tok = case tok of
                    OpenParen -> True
                    _ -> False
rparen =
  satisfy' clp "CloseParen"
  where clp tok = case tok of
                    CloseParen -> True
                    _ -> False
strTok =
  (\(Str s) -> s) <$> satisfy' cs "Str"
  where cs tok = case tok of
                   Str _ -> True
                   _ -> False
nodeTok =
  (\(NodeName s) -> s) <$> satisfy' cn "NodeName"
  where cn tok = case tok of
                   NodeName _ -> True
                   _ -> False
satisfy' p attendu = do
  x <- next
  if p x
     then return x
     else case x of
            LexError s -> failBad s
            _ -> fail ("Error: " ++ attendu ++ " expected, was " ++ show x)
parens :: SXMLParser a -> SXMLParser a
parens = bracket lparen rparen
lookupS _ [] = Nothing
lookupS idx (m:ms) = case M.lookup idx m of
                       Nothing -> lookupS idx ms
                       Just uri -> Just uri
sep = TS.singleton ':'