{- | Module : Data.ASN1.Types.String License : BSD-style Copyright : (c) 2010-2013 Vincent Hanquez Stability : experimental Portability : unknown Different String types available in ASN.1. -} module Data.ASN1.Types.String ( ASN1StringEncoding (..) , ASN1CharacterString (..) , asn1CharacterString , asn1CharacterToString ) where import Data.Bits ( (.&.), (.|.), shiftL, shiftR, testBit ) import Data.ByteString ( ByteString ) import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as BC import Data.String ( IsString (..) ) import Data.Word ( Word16, Word32, Word8 ) -- A note on T.61 encodings. The actual specification of a T.61 character set -- seems to be lost in time. As such, it will be considered an ASCII-like -- encoding. -- -- -- "sizable volume of software in the world treats TeletexString (T61String) -- as a simple 8-bit string with mostly Windows Latin 1" -- | Define all possible ASN.1 String encodings. data ASN1StringEncoding = IA5 -- ^ 128 characters equivalent to the ASCII alphabet. | UTF8 -- ^ UTF8. | General -- ^ All registered graphic and character sets (see ISO 2375) plus SPACE and -- DELETE. | Graphic -- ^ All registered G sets and SPACE. | Numeric -- ^ Encoding containing numeric [0-9] and space. | Printable -- ^ Printable [a-z] [A-Z] [()+,-.?:/=] and space. | VideoTex -- ^ CCITT's T.100 and T.101 character sets. | Visible -- ^ International ASCII printing character sets. | T61 -- ^ Teletext. | UTF32 -- ^ UTF32. | Character -- ^ Character. | BMP -- ^ UCS2. deriving (Eq, Ord, Show) -- | Provide a way to possibly encode or decode character strings based on -- character encoding. stringEncodingFunctions :: ASN1StringEncoding -> Maybe (ByteString -> String, String -> ByteString) stringEncodingFunctions encoding | encoding == UTF8 = Just (decodeUTF8, encodeUTF8) | encoding == BMP = Just (decodeBMP, encodeBMP) | encoding == UTF32 = Just (decodeUTF32, encodeUTF32) | encoding `elem` asciiLikeEncodings = Just (decodeASCII, encodeASCII) | otherwise = Nothing where asciiLikeEncodings = [IA5,Numeric,Printable,Visible,General,Graphic,T61] -- | Encode a string into a character string. asn1CharacterString :: ASN1StringEncoding -> String -> ASN1CharacterString asn1CharacterString encoding s = case stringEncodingFunctions encoding of Just (_, e) -> ASN1CharacterString encoding (e s) Nothing -> error ("cannot encode ASN1 Character String " ++ show encoding ++ " from string") -- | Try to decode an 'ASN1CharacterString' to a String. asn1CharacterToString :: ASN1CharacterString -> Maybe String asn1CharacterToString (ASN1CharacterString encoding bs) = case stringEncodingFunctions encoding of Just (d, _) -> Just (d bs) Nothing -> Nothing -- | ASN1 Character String with encoding data ASN1CharacterString = ASN1CharacterString { characterEncoding :: ASN1StringEncoding , getCharacterStringRawData :: ByteString } deriving (Eq, Ord, Show) instance IsString ASN1CharacterString where fromString s = ASN1CharacterString UTF8 (encodeUTF8 s) decodeUTF8 :: ByteString -> String decodeUTF8 b = loop 0 $ B.unpack b where loop :: Int -> [Word8] -> [Char] loop _ [] = [] loop pos (x:xs) | x `isClear` 7 = toEnum (fromIntegral x) : loop (pos+1) xs | x `isClear` 6 = error "continuation byte in heading context" | x `isClear` 5 = uncont 1 (x .&. 0x1f) pos xs | x `isClear` 4 = uncont 2 (x .&. 0xf) pos xs | x `isClear` 3 = uncont 3 (x .&. 0x7) pos xs | otherwise = error "too many byte" uncont :: Int -> Word8 -> Int -> [Word8] -> [Char] uncont 1 iniV pos xs = case xs of c1:xs' -> decodeCont iniV [c1] : loop (pos+2) xs' _ -> error "truncated continuation, expecting 1 byte" uncont 2 iniV pos xs = case xs of c1:c2:xs' -> decodeCont iniV [c1,c2] : loop (pos+3) xs' _ -> error "truncated continuation, expecting 2 bytes" uncont 3 iniV pos xs = case xs of c1:c2:c3:xs' -> decodeCont iniV [c1,c2,c3] : loop (pos+4) xs' _ -> error "truncated continuation, expecting 3 bytes" uncont _ _ _ _ = error "invalid number of bytes for continuation" decodeCont :: Word8 -> [Word8] -> Char decodeCont iniV l | all isContByte l = toEnum $ foldl (\acc v -> (acc `shiftL` 6) + fromIntegral v) (fromIntegral iniV) (map (.&. 0x3f) l) | otherwise = error "continuation bytes invalid" isContByte v = v `testBit` 7 && v `isClear` 6 isClear v i = not (v `testBit` i) encodeUTF8 :: String -> ByteString encodeUTF8 s = B.pack $ concatMap (toUTF8 . fromEnum) s where toUTF8 e | e < 0x80 = [fromIntegral e] | e < 0x800 = [fromIntegral (0xc0 .|. (e `shiftR` 6)), toCont e] | e < 0x10000 = [ fromIntegral (0xe0 .|. (e `shiftR` 12)) , toCont (e `shiftR` 6) , toCont e ] | e < 0x200000 = [fromIntegral (0xf0 .|. (e `shiftR` 18)) , toCont (e `shiftR` 12) , toCont (e `shiftR` 6) , toCont e ] | otherwise = error "not a valid value" toCont v = fromIntegral (0x80 .|. (v .&. 0x3f)) decodeASCII :: ByteString -> String decodeASCII = BC.unpack encodeASCII :: String -> ByteString encodeASCII = BC.pack decodeBMP :: ByteString -> String decodeBMP b | odd (B.length b) = error "not a valid BMP string" | otherwise = fromUCS2 $ B.unpack b where fromUCS2 [] = [] fromUCS2 (b0:b1:l) = let v :: Word16 v = (fromIntegral b0 `shiftL` 8) .|. fromIntegral b1 in toEnum (fromIntegral v) : fromUCS2 l fromUCS2 _ = error "decodeBMP: internal error" encodeBMP :: String -> ByteString encodeBMP s = B.pack $ concatMap (toUCS2 . fromEnum) s where toUCS2 v = [b0, b1] where b0 = fromIntegral (v `shiftR` 8) b1 = fromIntegral (v .&. 0xff) decodeUTF32 :: ByteString -> String decodeUTF32 bs | (B.length bs `mod` 4) /= 0 = error "not a valid UTF32 string" | otherwise = fromUTF32 0 where w32ToChar :: Word32 -> Char w32ToChar = toEnum . fromIntegral fromUTF32 ofs | ofs == B.length bs = [] | otherwise = let a = B.index bs ofs b = B.index bs (ofs+1) c = B.index bs (ofs+2) d = B.index bs (ofs+3) v = (fromIntegral a `shiftL` 24) .|. (fromIntegral b `shiftL` 16) .|. (fromIntegral c `shiftL` 8) .|. fromIntegral d in w32ToChar v : fromUTF32 (ofs+4) encodeUTF32 :: String -> ByteString encodeUTF32 s = B.pack $ concatMap (toUTF32 . fromEnum) s where toUTF32 v = [b0, b1, b2, b3] where b0 = fromIntegral (v `shiftR` 24) b1 = fromIntegral ((v `shiftR` 16) .&. 0xff) b2 = fromIntegral ((v `shiftR` 8) .&. 0xff) b3 = fromIntegral (v .&. 0xff)