{-# LANGUAGE TypeApplications #-}

module DataFrame.IO.Parquet.Binary where

import Control.Monad
import Data.Bits
import qualified Data.ByteString as BS
import Data.Char
import Data.IORef
import Data.Int
import Data.Word

littleEndianWord32 :: [Word8] -> Word32
littleEndianWord32 :: [Word8] -> Word32
littleEndianWord32 [Word8]
bytes
    | [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bytes Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 =
        (Word32 -> Word32 -> Word32) -> Word32 -> [Word32] -> Word32
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
            Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
(.|.)
            Word32
0
            ((Word8 -> Int -> Word32) -> [Word8] -> [Int] -> [Word32]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Word8
b Int
i -> Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
i) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
4 [Word8]
bytes) [Int
0, Int
8, Int
16, Int
24])
    | Bool
otherwise = [Word8] -> Word32
littleEndianWord32 (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
4 ([Word8] -> [Word8]) -> [Word8] -> [Word8]
forall a b. (a -> b) -> a -> b
$ [Word8]
bytes [Word8] -> [Word8] -> [Word8]
forall a. [a] -> [a] -> [a]
++ Word8 -> [Word8]
forall a. a -> [a]
repeat Word8
0)

littleEndianWord64 :: [Word8] -> Word64
littleEndianWord64 :: [Word8] -> Word64
littleEndianWord64 [Word8]
bytes =
    (Word64 -> Word64 -> Word64) -> Word64 -> [Word64] -> Word64
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
        Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
(.|.)
        Word64
0
        ((Word8 -> Int -> Word64) -> [Word8] -> [Int] -> [Word64]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Word8
b Int
i -> Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
i) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
8 [Word8]
bytes) [Int
0, Int
8 ..])

littleEndianInt32 :: [Word8] -> Int32
littleEndianInt32 :: [Word8] -> Int32
littleEndianInt32 = Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int32) -> ([Word8] -> Word32) -> [Word8] -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> Word32
littleEndianWord32

word64ToLittleEndian :: Word64 -> [Word8]
word64ToLittleEndian :: Word64 -> [Word8]
word64ToLittleEndian Word64
w = (Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
w Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
i)) [Int
0, Int
8, Int
16, Int
24, Int
32, Int
40, Int
48, Int
56]

word32ToLittleEndian :: Word32 -> [Word8]
word32ToLittleEndian :: Word32 -> [Word8]
word32ToLittleEndian Word32
w = (Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Word32 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
i)) [Int
0, Int
8, Int
16, Int
24]

readUVarInt :: [Word8] -> (Word64, [Word8])
readUVarInt :: [Word8] -> (Word64, [Word8])
readUVarInt [Word8]
xs = [Word8] -> Word64 -> Int -> Int -> (Word64, [Word8])
loop [Word8]
xs Word64
0 Int
0 Int
0
  where
    {-
    Each input byte contributes:
    - lower 7 payload bits
    - The high bit (0x80) is the continuation flag: 1 = more bytes follow, 0 = last byte
    Why the magic number 10: For a 64‑bit integer we need at most ceil(64 / 7) = 10 bytes
    -}
    loop :: [Word8] -> Word64 -> Int -> Int -> (Word64, [Word8])
    loop :: [Word8] -> Word64 -> Int -> Int -> (Word64, [Word8])
loop [Word8]
bs Word64
result Int
_ Int
10 = (Word64
result, [Word8]
bs)
    loop (Word8
b : [Word8]
bs) Word64
result Int
shift Int
i
        | Word8
b Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x80 = (Word64
result Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift), [Word8]
bs)
        | Bool
otherwise =
            let payloadBits :: Word64
payloadBits = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Word64
             in [Word8] -> Word64 -> Int -> Int -> (Word64, [Word8])
loop [Word8]
bs (Word64
result Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
payloadBits Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift)) (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    loop [] Word64
_ Int
_ Int
_ = [Char] -> (Word64, [Word8])
forall a. HasCallStack => [Char] -> a
error [Char]
"readUVarInt: not enough input bytes"

readVarIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8])
readVarIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes [Word8]
bs = (Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
n, [Word8]
rem)
  where
    (Integer
n, [Word8]
rem) = Int -> Integer -> [Word8] -> (Integer, [Word8])
forall {a}.
(Integral a, Bits a) =>
Int -> Integer -> [a] -> (Integer, [a])
loop Int
0 Integer
0 [Word8]
bs
    loop :: Int -> Integer -> [a] -> (Integer, [a])
loop Int
_ Integer
result [] = (Integer
result, [])
    loop Int
shift Integer
result (a
x : [a]
xs) =
        let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. (a -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
x a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift
         in if a
x a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x80 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0x80 then (Integer
res, [a]
xs) else Int -> Integer -> [a] -> (Integer, [a])
loop (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res [a]
xs

readIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8])
readIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8])
readIntFromBytes [Word8]
bs =
    let (Int32
n, [Word8]
rem) = [Word8] -> (Int32, [Word8])
forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes [Word8]
bs
        u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
     in (Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), [Word8]
rem)

readInt32FromBytes :: [Word8] -> (Int32, [Word8])
readInt32FromBytes :: [Word8] -> (Int32, [Word8])
readInt32FromBytes [Word8]
bs =
    let (Int64
n', [Word8]
rem) = forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes @Int64 [Word8]
bs
        n :: Int32
n = Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
n' :: Int32
        u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
     in ((Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), [Word8]
rem)

readAndAdvance :: IORef Int -> BS.ByteString -> IO Word8
readAndAdvance :: IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
bufferPos ByteString
buffer = do
    Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    let b :: Word8
b = HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer Int
pos
    IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
bufferPos (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Word8
b

readVarIntFromBuffer :: (Integral a) => BS.ByteString -> IORef Int -> IO a
readVarIntFromBuffer :: forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer ByteString
buf IORef Int
bufferPos = do
    Int
start <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    let loop :: t -> Int -> Integer -> IO Integer
loop t
i Int
shift Integer
result = do
            Word8
b <- IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
bufferPos ByteString
buf
            let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. (Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift
            if Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x80
                then Integer -> IO Integer
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Integer
res
                else t -> Int -> Integer -> IO Integer
loop (t
i t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res
    Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> a) -> IO Integer -> IO a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Int -> Integer -> IO Integer
forall {t}. Num t => t -> Int -> Integer -> IO Integer
loop Int
start Int
0 Integer
0

readIntFromBuffer :: (Integral a) => BS.ByteString -> IORef Int -> IO a
readIntFromBuffer :: forall a. Integral a => ByteString -> IORef Int -> IO a
readIntFromBuffer ByteString
buf IORef Int
bufferPos = do
    Int32
n <- ByteString -> IORef Int -> IO Int32
forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer ByteString
buf IORef Int
bufferPos
    let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
    a -> IO a
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> a -> IO a
forall a b. (a -> b) -> a -> b
$ Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))

readInt32FromBuffer :: BS.ByteString -> IORef Int -> IO Int32
readInt32FromBuffer :: ByteString -> IORef Int -> IO Int32
readInt32FromBuffer ByteString
buf IORef Int
bufferPos = do
    Int32
n <- (Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int32) -> IO Int64 -> IO Int32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int64 ByteString
buf IORef Int
bufferPos) :: IO Int32
    let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
    Int32 -> IO Int32
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int32 -> IO Int32) -> Int32 -> IO Int32
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))

readString :: BS.ByteString -> IORef Int -> IO String
readString :: ByteString -> IORef Int -> IO [Char]
readString ByteString
buf IORef Int
pos = do
    Int
nameSize <- forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int ByteString
buf IORef Int
pos
    (Word8 -> Char) -> [Word8] -> [Char]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) ([Word8] -> [Char]) -> IO [Word8] -> IO [Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO Word8 -> IO [Word8]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nameSize (IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
pos ByteString
buf)

readByteStringFromBytes :: [Word8] -> ([Word8], [Word8])
readByteStringFromBytes :: [Word8] -> ([Word8], [Word8])
readByteStringFromBytes [Word8]
xs =
    let
        (Int
size, [Word8]
rem) = forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes @Int [Word8]
xs
     in
        Int -> [Word8] -> ([Word8], [Word8])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
size [Word8]
rem

readByteString :: BS.ByteString -> IORef Int -> IO [Word8]
readByteString :: ByteString -> IORef Int -> IO [Word8]
readByteString ByteString
buf IORef Int
pos = do
    Int
size <- forall a. Integral a => ByteString -> IORef Int -> IO a
readVarIntFromBuffer @Int ByteString
buf IORef Int
pos
    Int -> IO Word8 -> IO [Word8]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
size (IORef Int -> ByteString -> IO Word8
readAndAdvance IORef Int
pos ByteString
buf)

readByteString' :: BS.ByteString -> Int64 -> IO [Word8]
readByteString' :: ByteString -> Int64 -> IO [Word8]
readByteString' ByteString
buf Int64
size = (Int64 -> IO Word8) -> [Int64] -> IO [Word8]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (Int64 -> ByteString -> IO Word8
`readSingleByte` ByteString
buf) [Int64
0 .. (Int64
size Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
1)]

readSingleByte :: Int64 -> BS.ByteString -> IO Word8
readSingleByte :: Int64 -> ByteString -> IO Word8
readSingleByte Int64
pos ByteString
buffer = Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8 -> IO Word8) -> Word8 -> IO Word8
forall a b. (a -> b) -> a -> b
$ HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
pos)

readNoAdvance :: IORef Int -> BS.ByteString -> IO Word8
readNoAdvance :: IORef Int -> ByteString -> IO Word8
readNoAdvance IORef Int
bufferPos ByteString
buffer = do
    Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8 -> IO Word8) -> Word8 -> IO Word8
forall a b. (a -> b) -> a -> b
$ HasCallStack => ByteString -> Int -> Word8
ByteString -> Int -> Word8
BS.index ByteString
buffer Int
pos