{-# LANGUAGE CPP #-}
{-# LANGUAGE Safe #-}
-- |

-- Module      : Data.Text.Encoding.Error

-- Copyright   : (c) Bryan O'Sullivan 2009

--

-- License     : BSD-style

-- Maintainer  : bos@serpentine.com

-- Portability : GHC

--

-- Types and functions for dealing with encoding and decoding errors

-- in Unicode text.

--

-- The standard functions for encoding and decoding text are strict,

-- which is to say that they throw exceptions on invalid input.  This

-- is often unhelpful on real world input, so alternative functions

-- exist that accept custom handlers for dealing with invalid inputs.

-- These 'OnError' handlers are normal Haskell functions.  You can use

-- one of the presupplied functions in this module, or you can write a

-- custom handler of your own.


module Data.Text.Encoding.Error
    (
    -- * Error handling types

      UnicodeException(..)
    , OnError
    , OnDecodeError
    , OnEncodeError
    -- * Useful error handling functions

    , lenientDecode
    , strictDecode
    , strictEncode
    , ignore
    , replace
    ) where

import Control.DeepSeq (NFData (..))
import Control.Exception (Exception, throw)
import Data.Word (Word8)
import Numeric (showHex)

-- | Function type for handling a coding error.  It is supplied with

-- two inputs:

--

-- * A 'String' that describes the error.

--

-- * The input value that caused the error.  If the error arose

--   because the end of input was reached or could not be identified

--   precisely, this value will be 'Nothing'.

--

-- If the handler returns a value wrapped with 'Just', that value will

-- be used in the output as the replacement for the invalid input.  If

-- it returns 'Nothing', no value will be used in the output.

--

-- Should the handler need to abort processing, it should use 'error'

-- or 'throw' an exception (preferably a 'UnicodeException').  It may

-- use the description provided to construct a more helpful error

-- report.

type OnError a b = String -> Maybe a -> Maybe b

-- | A handler for a decoding error.

type OnDecodeError = OnError Word8 Char

-- | A handler for an encoding error.

{-# DEPRECATED OnEncodeError "This exception is never used in practice, and will be removed." #-}
type OnEncodeError = OnError Char Word8

-- | An exception type for representing Unicode encoding errors.

data UnicodeException =
    DecodeError String (Maybe Word8)
    -- ^ Could not decode a byte sequence because it was invalid under

    -- the given encoding, or ran out of input in mid-decode.

  | EncodeError String (Maybe Char)
    -- ^ Tried to encode a character that could not be represented

    -- under the given encoding, or ran out of input in mid-encode.

    deriving (UnicodeException -> UnicodeException -> Bool
(UnicodeException -> UnicodeException -> Bool)
-> (UnicodeException -> UnicodeException -> Bool)
-> Eq UnicodeException
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UnicodeException -> UnicodeException -> Bool
== :: UnicodeException -> UnicodeException -> Bool
$c/= :: UnicodeException -> UnicodeException -> Bool
/= :: UnicodeException -> UnicodeException -> Bool
Eq)

{-# DEPRECATED EncodeError "This constructor is never used, and will be removed." #-}

showUnicodeException :: UnicodeException -> String
showUnicodeException :: UnicodeException -> String
showUnicodeException (DecodeError String
desc (Just Word8
w))
    = String
"Cannot decode byte '\\x" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Word8 -> String -> String
forall a. Integral a => a -> String -> String
showHex Word8
w (String
"': " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
desc)
showUnicodeException (DecodeError String
desc Maybe Word8
Nothing)
    = String
"Cannot decode input: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
desc
showUnicodeException (EncodeError String
desc (Just Char
c))
    = String
"Cannot encode character '\\x" String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String -> String
forall a. Integral a => a -> String -> String
showHex (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) (String
"': " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
desc)
showUnicodeException (EncodeError String
desc Maybe Char
Nothing)
    = String
"Cannot encode input: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
desc

instance Show UnicodeException where
    show :: UnicodeException -> String
show = UnicodeException -> String
showUnicodeException

instance Exception UnicodeException

instance NFData UnicodeException where
    rnf :: UnicodeException -> ()
rnf (DecodeError String
desc Maybe Word8
w) = String -> ()
forall a. NFData a => a -> ()
rnf String
desc () -> () -> ()
forall a b. a -> b -> b
`seq` Maybe Word8 -> ()
forall a. NFData a => a -> ()
rnf Maybe Word8
w () -> () -> ()
forall a b. a -> b -> b
`seq` ()
    rnf (EncodeError String
desc Maybe Char
c) = String -> ()
forall a. NFData a => a -> ()
rnf String
desc () -> () -> ()
forall a b. a -> b -> b
`seq` Maybe Char -> ()
forall a. NFData a => a -> ()
rnf Maybe Char
c () -> () -> ()
forall a b. a -> b -> b
`seq` ()

-- | Throw a 'UnicodeException' if decoding fails.

strictDecode :: OnDecodeError
strictDecode :: OnDecodeError
strictDecode String
desc Maybe Word8
c = UnicodeException -> Maybe Char
forall a e. Exception e => e -> a
throw (String -> Maybe Word8 -> UnicodeException
DecodeError String
desc Maybe Word8
c)

-- | Replace an invalid input byte with the Unicode replacement

-- character U+FFFD.

lenientDecode :: OnDecodeError
lenientDecode :: OnDecodeError
lenientDecode String
_ Maybe Word8
_ = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\xfffd'

-- | Throw a 'UnicodeException' if encoding fails.

{-# DEPRECATED strictEncode "This function always throws an exception, and will be removed." #-}
strictEncode :: OnEncodeError
strictEncode :: OnEncodeError
strictEncode String
desc Maybe Char
c = UnicodeException -> Maybe Word8
forall a e. Exception e => e -> a
throw (String -> Maybe Char -> UnicodeException
EncodeError String
desc Maybe Char
c)

-- | Ignore an invalid input, substituting nothing in the output.

ignore :: OnError a b
ignore :: forall a b. OnError a b
ignore String
_ Maybe a
_ = Maybe b
forall a. Maybe a
Nothing

-- | Replace an invalid input with a valid output.

replace :: b -> OnError a b
replace :: forall b a. b -> OnError a b
replace b
c String
_ Maybe a
_ = b -> Maybe b
forall a. a -> Maybe a
Just b
c