{-# LANGUAGE ScopedTypeVariables #-} module Text.XML.Expat.Chunked ( -- * Tree structure CNode, NodeG(..), -- * Generic node manipulation module Text.XML.Expat.NodeClass, -- * Generic manipulation of the child list module Data.List.Class, -- * Parse to tree ParserOptions(..), defaultParserOptions, Encoding(..), Text.XML.Expat.Chunked.parse, HandlerT(..), XMLParseError(..), XMLParseLocation(..) ) where import Control.Monad.ListT import qualified Text.XML.Expat.IO as IO import Text.XML.Expat.NodeClass import Text.XML.Expat.SAX import Text.XML.Expat.Tree import Control.Monad import Control.Monad.Trans import qualified Data.ByteString as B import Data.IORef import Data.Iteratee hiding (head, peek) import Data.Iteratee.WrappedByteString import Data.List.Class import Data.Word ------ Types ------------------------------------------------------------------ -- | A tree representation that uses a monadic list as its child list type. type CNode m tag text = NodeG (ListT (HandlerT m)) tag text ------ Queue ------------------------------------------------------------------ -- Mutable queue implemented as a linked list data QueueCell a = Value a (QueueHead a) | End | Pending newtype QueueHead a = QueueHead (IORef (QueueCell a)) newtype QueueTail a = QueueTail (IORef (IORef (QueueCell a))) newQueue :: IO (QueueHead a, QueueTail a) newQueue = do end <- newIORef Pending endRef <- newIORef end return (QueueHead end, QueueTail endRef) peek :: QueueHead a -> IO (QueueCell a) peek (QueueHead hRef) = readIORef hRef push :: QueueTail a -> a -> IO () push (QueueTail qtRef) val = do tl <- readIORef qtRef newTl <- newIORef Pending writeIORef tl (Value val (QueueHead newTl)) writeIORef qtRef newTl pushEnd :: QueueTail a -> IO () pushEnd (QueueTail qtRef) = do tl <- readIORef qtRef writeIORef tl End ------ Handler ---------------------------------------------------------------- data Result m a = Yield (HandlerT m a) | Result a | HandlerErr String data HandlerT m a = HandlerT { runHandlerT :: m (Result m a) } instance Monad m => Functor (HandlerT m) where fmap = liftM instance Monad m => Monad (HandlerT m) where return a = HandlerT $ return $ Result a f >>= g = HandlerT $ do res1 <- runHandlerT f case res1 of Yield c -> return $ Yield (c >>= g) Result a -> runHandlerT (g a) HandlerErr err -> return $ HandlerErr err fail err = HandlerT $ return $ HandlerErr err instance MonadTrans HandlerT where lift m = HandlerT $ do r <- m return $ Result r instance MonadIO m => MonadIO (HandlerT m) where liftIO m = HandlerT $ do r <- liftIO m return $ Result r yield :: Monad m => HandlerT m () yield = HandlerT $ return $ Yield (HandlerT $ return $ Result ()) ------------------------------------------------------------------------------- -- | An iteratee that parses the input document, passing a representation of it -- to the specified handler monad. The monad runs lazily using co-routines, so -- if it requests a part of the tree that hasn't been parsed yet, it will -- be suspended, and continued when it's available. -- -- This implementation does /not/ use Haskell's lazy I/O. parse :: forall m a tag text . ( MonadIO m, GenericXMLString tag, GenericXMLString text ) => ParserOptions tag text -> (CNode m tag text -> HandlerT m a) -> IterateeG WrappedByteString Word8 m (Either ErrMsg a) parse opts code = IterateeG $ \str -> do let enc = parserEncoding opts parser <- liftIO $ IO.newParser enc (rootHd, rootTl) <- liftIO $ newQueue stackRef <- liftIO $ newIORef [rootTl] liftIO $ IO.setStartElementHandler parser $ \cName cAttrs -> do name <- textFromCString cName attrs <- forM cAttrs $ \(cAttrName,cAttrValue) -> do attrName <- textFromCString cAttrName attrValue <- textFromCString cAttrValue return (attrName, attrValue) --loc <- getParseLocation parser stack <- readIORef stackRef (hd, tl) <- newQueue push (head stack) $ Element name attrs (ListT $ iter hd) writeIORef stackRef (tl:stack) return True liftIO $ IO.setEndElementHandler parser $ \_ -> do --name <- textFromCString cName --loc <- getParseLocation parser stack <- readIORef stackRef pushEnd (head stack) writeIORef stackRef (tail stack) return True liftIO $ IO.setCharacterDataHandler parser $ \cText -> do txt <- gxFromCStringLen cText --loc <- getParseLocation parser stack <- readIORef stackRef push (head stack) $ Text txt return True let nextIter :: HandlerT m a -> IterateeG WrappedByteString Word8 m (Either ErrMsg a) nextIter c = IterateeG $ \str -> do case str of EOF (Just err) -> return $ Done (Left err) str _ -> do mErr <- case str of Chunk (WrapBS blk) -> liftIO $ IO.parseChunk parser blk False EOF _ -> liftIO $ IO.parseChunk parser B.empty True res <- runHandlerT c return $ case res of Yield c' -> do case (str, mErr) of (Chunk _, Just err) -> Cont (nextIter c') (Just $ Err $ show err) (Chunk _, Nothing) -> Cont (nextIter c') Nothing (EOF _, Just err) -> Done (Left $ Err $ show err) str (EOF _, Nothing) -> Done (Left $ Err "EOF not handled") str Result a -> do case mErr of Just err -> Done (Left $ Err $ show err) str Nothing -> Done (Right a) str HandlerErr handlerErr -> do case mErr of Just parseErr -> Done (Left $ Err $ show parseErr) str Nothing -> Done (Left $ Err $ show handlerErr) str let process = do elt <- iter rootHd case elt of Nil -> fail "no root node" Cons node _ -> code node runIter (nextIter process) str where iter :: QueueHead (CNode m tag text) -> HandlerT m (ListItem (ListT (HandlerT m)) (CNode m tag text)) iter hd = do cell <- liftIO $ peek hd case cell of Pending -> yield >> iter hd End -> return $ Nil Value a hd' -> return $ Cons a (ListT $ iter hd')