{-# LANGUAGE TypeApplications #-}
module DataFrame.IO.Parquet.Binary where
import Control.Exception (bracketOnError)
import Control.Monad
import Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BSU
import Data.Char
import Data.IORef
import Data.Int
import Data.Word
import qualified Foreign.Marshal.Alloc as Foreign
import qualified Foreign.Ptr as Foreign
import qualified Foreign.Storable as Foreign
littleEndianWord32 :: BS.ByteString -> Word32
littleEndianWord32 :: ByteString -> Word32
littleEndianWord32 ByteString
bytes
| ByteString -> Int
BS.length ByteString
bytes Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 =
(Word32 -> Word32 -> Word32) -> Word32 -> [Word32] -> Word32
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
(.|.)
Word32
0
( (Word8 -> Int -> Word32) -> [Word8] -> [Int] -> [Word32]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
(\Word8
b Int
i -> Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
i)
(ByteString -> [Word8]
BS.unpack (ByteString -> [Word8]) -> ByteString -> [Word8]
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take Int
4 ByteString
bytes)
[Int
0, Int
8, Int
16, Int
24]
)
| Bool
otherwise =
ByteString -> Word32
littleEndianWord32 (Int -> ByteString -> ByteString
BS.take Int
4 (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
bytes ByteString -> ByteString -> ByteString
`BS.append` [Word8] -> ByteString
BS.pack [Word8
0, Word8
0, Word8
0, Word8
0])
littleEndianWord64 :: BS.ByteString -> Word64
littleEndianWord64 :: ByteString -> Word64
littleEndianWord64 ByteString
bytes =
(Word64 -> Word64 -> Word64) -> Word64 -> [Word64] -> Word64
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
(.|.)
Word64
0
( (Word8 -> Int -> Word64) -> [Word8] -> [Int] -> [Word64]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
(\Word8
b Int
i -> Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
i)
(ByteString -> [Word8]
BS.unpack (ByteString -> [Word8]) -> ByteString -> [Word8]
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take Int
8 ByteString
bytes)
[Int
0, Int
8 ..]
)
littleEndianInt32 :: BS.ByteString -> Int32
littleEndianInt32 :: ByteString -> Int32
littleEndianInt32 = Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int32) -> (ByteString -> Word32) -> ByteString -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Word32
littleEndianWord32
word64ToLittleEndian :: Word64 -> BS.ByteString
word64ToLittleEndian :: Word64 -> ByteString
word64ToLittleEndian Word64
w =
(Word8 -> Word8) -> ByteString -> ByteString
BS.map
(\Word8
i -> Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
w Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
i))
([Word8] -> ByteString
BS.pack [Word8
0, Word8
8, Word8
16, Word8
24, Word8
32, Word8
40, Word8
48, Word8
56])
word32ToLittleEndian :: Word32 -> BS.ByteString
word32ToLittleEndian :: Word32 -> ByteString
word32ToLittleEndian Word32
w =
(Word8 -> Word8) -> ByteString -> ByteString
BS.map (\Word8
i -> Word32 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
i)) ([Word8] -> ByteString
BS.pack [Word8
0, Word8
8, Word8
16, Word8
24])
readUVarInt :: BS.ByteString -> (Word64, BS.ByteString)
readUVarInt :: ByteString -> (Word64, ByteString)
readUVarInt ByteString
xs = ByteString -> Word64 -> Int -> Int -> (Word64, ByteString)
loop ByteString
xs Word64
0 Int
0 Int
0
where
loop :: BS.ByteString -> Word64 -> Int -> Int -> (Word64, BS.ByteString)
loop :: ByteString -> Word64 -> Int -> Int -> (Word64, ByteString)
loop ByteString
bs Word64
result Int
_ Int
10 = (Word64
result, ByteString
bs)
loop ByteString
xs Word64
result Int
shift Int
i = case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
xs of
Maybe (Word8, ByteString)
Nothing -> [Char] -> (Word64, ByteString)
forall a. HasCallStack => [Char] -> a
error [Char]
"readUVarInt: not enough input bytes"
Just (Word8
b, ByteString
bs) ->
if Word8
b Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x80
then (Word64
result Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift), ByteString
bs)
else
let payloadBits :: Word64
payloadBits = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Word64
in ByteString -> Word64 -> Int -> Int -> (Word64, ByteString)
loop ByteString
bs (Word64
result Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
payloadBits Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift)) (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
readVarIntFromBytes :: (Integral a) => BS.ByteString -> (a, BS.ByteString)
readVarIntFromBytes :: forall a. Integral a => ByteString -> (a, ByteString)
readVarIntFromBytes ByteString
bs = (Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
n, ByteString
rem)
where
(Integer
n, ByteString
rem) = Int -> Integer -> ByteString -> (Integer, ByteString)
loop Int
0 Integer
0 ByteString
bs
loop :: Int -> Integer -> ByteString -> (Integer, ByteString)
loop Int
shift Integer
result ByteString
bs = case ByteString -> Maybe (Word8, ByteString)
BS.uncons ByteString
bs of
Maybe (Word8, ByteString)
Nothing -> (Integer
result, ByteString
BS.empty)
Just (Word8
x, ByteString
xs) ->
let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. (Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
x Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift
in if Word8
x Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x80 then (Integer
res, ByteString
xs) else Int -> Integer -> ByteString -> (Integer, ByteString)
loop (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res ByteString
xs
readIntFromBytes :: (Integral a) => BS.ByteString -> (a, BS.ByteString)
readIntFromBytes :: forall a. Integral a => ByteString -> (a, ByteString)
readIntFromBytes ByteString
bs =
let (Int32
n, ByteString
rem) = ByteString -> (Int32, ByteString)
forall a. Integral a => ByteString -> (a, ByteString)
readVarIntFromBytes ByteString
bs
u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
in (Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), ByteString
rem)
readInt32FromBytes :: BS.ByteString -> (Int32, BS.ByteString)
readInt32FromBytes :: ByteString -> (Int32, ByteString)
readInt32FromBytes ByteString
bs =
let (Int64
n', ByteString
rem) = forall a. Integral a => ByteString -> (a, ByteString)
readVarIntFromBytes @Int64 ByteString
bs
n :: Int32
n = Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
n' :: Int32
u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
in ((Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), ByteString
rem)
readAndAdvance :: IORef Int -> BS.ByteString -> IO Word8
readAndAdvance :: IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
bufferPos ByteString
buffer = do
Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
let b :: Word8
b = HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer Int
pos
IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
bufferPos (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Word8
b
readVarIntFromBuffer :: (Integral a) => BS.ByteString -> IORef Int -> IO a
readVarIntFromBuffer :: forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer ByteString
buf IORef Int
bufferPos = do
Int
start <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
let loop :: t -> Int -> Integer -> IO Integer
loop t
i Int
shift Integer
result = do
Word8
b <- IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
bufferPos ByteString
buf
let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. (Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift
if Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x80
then Integer -> IO Integer
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Integer
res
else t -> Int -> Integer -> IO Integer
loop (t
i t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res
Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> a) -> IO Integer -> IO a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Int -> Integer -> IO Integer
forall {t}. Num t => t -> Int -> Integer -> IO Integer
loop Int
start Int
0 Integer
0
readIntFromBuffer :: (Integral a) => BS.ByteString -> IORef Int -> IO a
readIntFromBuffer :: forall a. Integral a => ByteString -> IORef Int -> IO a
readIntFromBuffer ByteString
buf IORef Int
bufferPos = do
Int32
n <- ByteString -> IORef Int -> IO Int32
forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer ByteString
buf IORef Int
bufferPos
let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
a -> IO a
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> a -> IO a
forall a b. (a -> b) -> a -> b
$ Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))
readInt32FromBuffer :: BS.ByteString -> IORef Int -> IO Int32
readInt32FromBuffer :: ByteString -> IORef Int -> IO Int32
readInt32FromBuffer ByteString
buf IORef Int
bufferPos = do
Int32
n <- (Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int32) -> IO Int64 -> IO Int32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int64 ByteString
buf IORef Int
bufferPos) :: IO Int32
let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
Int32 -> IO Int32
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int32 -> IO Int32) -> Int32 -> IO Int32
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))
readString :: BS.ByteString -> IORef Int -> IO String
readString :: ByteString -> IORef Int -> IO [Char]
readString ByteString
buf IORef Int
pos = do
Int
nameSize <- forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int ByteString
buf IORef Int
pos
Int -> IO Char -> IO [Char]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nameSize (Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Char) -> IO Word8 -> IO Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
pos ByteString
buf)
readByteStringFromBytes :: BS.ByteString -> (BS.ByteString, BS.ByteString)
readByteStringFromBytes :: ByteString -> (ByteString, ByteString)
readByteStringFromBytes ByteString
xs =
let
(Int
size, ByteString
rem) = forall a. Integral a => ByteString -> (a, ByteString)
readVarIntFromBytes @Int ByteString
xs
in
Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
size ByteString
rem
readByteString :: BS.ByteString -> IORef Int -> IO BS.ByteString
readByteString :: ByteString -> IORef Int -> IO ByteString
readByteString ByteString
buf IORef Int
pos = do
Int
size <- forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int ByteString
buf IORef Int
pos
Int -> (Int -> IO Word8) -> IO ByteString
fillByteStringByWord8 Int
size (\Int
_ -> IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
pos ByteString
buf)
readByteString' :: BS.ByteString -> Int64 -> IO BS.ByteString
readByteString' :: ByteString -> Int64 -> IO ByteString
readByteString' ByteString
buf Int64
size =
Int -> (Int -> IO Word8) -> IO ByteString
fillByteStringByWord8
(Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
size)
((Int64 -> ByteString -> IO Word8
`readSingleByte` ByteString
buf) (Int64 -> IO Word8) -> (Int -> Int64) -> Int -> IO Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral)
fillByteStringByWord8 :: Int -> (Int -> IO Word8) -> IO BS.ByteString
fillByteStringByWord8 :: Int -> (Int -> IO Word8) -> IO ByteString
fillByteStringByWord8 Int
size Int -> IO Word8
getByte = do
IO (Ptr Word8)
-> (Ptr Word8 -> IO ())
-> (Ptr Word8 -> IO ByteString)
-> IO ByteString
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracketOnError
(Int -> IO (Ptr Word8)
forall a. Int -> IO (Ptr a)
Foreign.mallocBytes Int
size :: IO (Foreign.Ptr Word8))
Ptr Word8 -> IO ()
forall a. Ptr a -> IO ()
Foreign.free
( \Ptr Word8
p -> do
Int -> Ptr Word8 -> IO ()
forall {b}. Int -> Ptr b -> IO ()
fill Int
0 Ptr Word8
p
Ptr Word8 -> Int -> IO () -> IO ByteString
BSU.unsafePackCStringFinalizer Ptr Word8
p Int
size (Ptr Word8 -> IO ()
forall a. Ptr a -> IO ()
Foreign.free Ptr Word8
p)
)
where
fill :: Int -> Ptr b -> IO ()
fill Int
i Ptr b
p
| Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
size = () -> IO ()
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
| Bool
otherwise = Int -> IO Word8
getByte Int
i IO Word8 -> (Word8 -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Ptr b -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
Foreign.pokeByteOff Ptr b
p Int
i IO () -> IO () -> IO ()
forall a b. IO a -> IO b -> IO b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Ptr b -> IO ()
fill (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Ptr b
p
{-# INLINE fillByteStringByWord8 #-}
readSingleByte :: Int64 -> BS.ByteString -> IO Word8
readSingleByte :: Int64 -> ByteString -> IO Word8
readSingleByte Int64
pos ByteString
buffer = Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8 -> IO Word8) -> Word8 -> IO Word8
forall a b. (a -> b) -> a -> b
$ HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
pos)
readNoAdvance :: IORef Int -> BS.ByteString -> IO Word8
readNoAdvance :: IORef Int -> ByteString -> IO Word8
readNoAdvance IORef Int
bufferPos ByteString
buffer = do
Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8 -> IO Word8) -> Word8 -> IO Word8
forall a b. (a -> b) -> a -> b
$ HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer Int
pos