{-# LANGUAGE TypeApplications #-}

module DataFrame.IO.Parquet.Binary where

import Control.Monad
import Data.Bits
import Data.Char
import Data.IORef
import Data.Word
import Foreign
import System.IO

littleEndianWord32 :: [Word8] -> Word32
littleEndianWord32 :: [Word8] -> Word32
littleEndianWord32 [Word8]
bytes
    | [Word8] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word8]
bytes Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
4 = (Word32 -> Word32 -> Word32) -> Word32 -> [Word32] -> Word32
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
(.|.) Word32
0 ((Word8 -> Int -> Word32) -> [Word8] -> [Int] -> [Word32]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Word8
b Int
i -> (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
i) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
4 [Word8]
bytes) [Int
0, Int
8, Int
16, Int
24])
    | Bool
otherwise = [Word8] -> Word32
littleEndianWord32 (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
4 ([Word8] -> [Word8]) -> [Word8] -> [Word8]
forall a b. (a -> b) -> a -> b
$ [Word8]
bytes [Word8] -> [Word8] -> [Word8]
forall a. [a] -> [a] -> [a]
++ Word8 -> [Word8]
forall a. a -> [a]
repeat Word8
0)

littleEndianWord64 :: [Word8] -> Word64
littleEndianWord64 :: [Word8] -> Word64
littleEndianWord64 [Word8]
bytes = (Word64 -> Word64 -> Word64) -> Word64 -> [Word64] -> Word64
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
(.|.) Word64
0 ((Word8 -> Int -> Word64) -> [Word8] -> [Int] -> [Word64]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Word8
b Int
i -> (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b) Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
i) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
8 [Word8]
bytes) [Int
0, Int
8 ..])

littleEndianInt32 :: [Word8] -> Int32
littleEndianInt32 :: [Word8] -> Int32
littleEndianInt32 = Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int32) -> ([Word8] -> Word32) -> [Word8] -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> Word32
littleEndianWord32

word64ToLittleEndian :: Word64 -> [Word8]
word64ToLittleEndian :: Word64 -> [Word8]
word64ToLittleEndian Word64
w = (Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Word64 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
w Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftR` Int
i)) [Int
0, Int
8, Int
16, Int
24, Int
32, Int
40, Int
48, Int
56]

word32ToLittleEndian :: Word32 -> [Word8]
word32ToLittleEndian :: Word32 -> [Word8]
word32ToLittleEndian Word32
w = (Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> Word32 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
w Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
i)) [Int
0, Int
8, Int
16, Int
24]

readUVarInt :: [Word8] -> (Word64, [Word8])
readUVarInt :: [Word8] -> (Word64, [Word8])
readUVarInt [Word8]
xs = [Word8] -> Word64 -> Int -> Integer -> (Word64, [Word8])
forall {a} {t} {t}.
(Integral a, Num t, Num t, Bits t, Bits a, Eq t) =>
[a] -> t -> Int -> t -> (t, [a])
loop [Word8]
xs Word64
0 Int
0 Integer
0
  where
    loop :: [a] -> t -> Int -> t -> (t, [a])
loop [a]
bs t
x Int
_ t
10 = (t
x, [a]
bs)
    loop (a
b : [a]
bs) t
x Int
s t
i
        | a
b a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0x80 = (t
x t -> t -> t
forall a. Bits a => a -> a -> a
.|. ((a -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
b) t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftL` Int
s), [a]
bs)
        | Bool
otherwise = [a] -> t -> Int -> t -> (t, [a])
loop [a]
bs (t
x t -> t -> t
forall a. Bits a => a -> a -> a
.|. (a -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((a
b a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x7f) a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
s))) (Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) (t
i t -> t -> t
forall a. Num a => a -> a -> a
+ t
1)

readVarIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8])
readVarIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes [Word8]
bs = (Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
n, [Word8]
rem)
  where
    (Integer
n, [Word8]
rem) = Int -> Integer -> [Word8] -> (Integer, [Word8])
forall {a}.
(Integral a, Bits a) =>
Int -> Integer -> [a] -> (Integer, [a])
loop Int
0 Integer
0 [Word8]
bs
    loop :: Int -> Integer -> [a] -> (Integer, [a])
loop Int
_ Integer
result [] = (Integer
result, [])
    loop Int
shift Integer
result (a
x : [a]
xs) =
        let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. ((a -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
x a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift)
         in if (a
x a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x80) a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0x80 then (Integer
res, [a]
xs) else Int -> Integer -> [a] -> (Integer, [a])
loop (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res [a]
xs

readIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8])
readIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8])
readIntFromBytes [Word8]
bs =
    let (Int32
n, [Word8]
rem) = [Word8] -> (Int32, [Word8])
forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes [Word8]
bs
        u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
     in (Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), [Word8]
rem)

readInt32FromBytes :: [Word8] -> (Int32, [Word8])
readInt32FromBytes :: [Word8] -> (Int32, [Word8])
readInt32FromBytes [Word8]
bs =
    let (Int64
n', [Word8]
rem) = forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes @Int64 [Word8]
bs
        n :: Int32
n = Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
n' :: Int32
        u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
     in ((Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1)), [Word8]
rem)

readAndAdvance :: IORef Int -> Ptr b -> IO Word8
readAndAdvance :: forall b. IORef Int -> Ptr b -> IO Word8
readAndAdvance IORef Int
bufferPos Ptr b
buffer = do
    Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    Word8
b <- Ptr b -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr b
buffer Int
pos :: IO Word8
    IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
modifyIORef IORef Int
bufferPos (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    Word8 -> IO Word8
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Word8
b

readVarIntFromBuffer :: (Integral a) => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer :: forall a b. Integral a => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer Ptr b
buf IORef Int
bufferPos = do
    Int
start <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    let loop :: t -> Int -> Integer -> IO Integer
loop t
i Int
shift Integer
result = do
            Word8
b <- IORef Int -> Ptr b -> IO Word8
forall b. IORef Int -> Ptr b -> IO Word8
readAndAdvance IORef Int
bufferPos Ptr b
buf
            let res :: Integer
res = Integer
result Integer -> Integer -> Integer
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7f) :: Integer) Integer -> Int -> Integer
forall a. Bits a => a -> Int -> a
`shiftL` Int
shift)
            if (Word8
b Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x80
                then Integer -> IO Integer
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Integer
res
                else t -> Int -> Integer -> IO Integer
loop (t
i t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) (Int
shift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Integer
res
    Integer -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> a) -> IO Integer -> IO a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Int -> Integer -> IO Integer
forall {t}. Num t => t -> Int -> Integer -> IO Integer
loop Int
start Int
0 Integer
0

readIntFromBuffer :: (Integral a) => Ptr b -> IORef Int -> IO a
readIntFromBuffer :: forall a b. Integral a => Ptr b -> IORef Int -> IO a
readIntFromBuffer Ptr b
buf IORef Int
bufferPos = do
    Int32
n <- Ptr b -> IORef Int -> IO Int32
forall a b. Integral a => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer Ptr b
buf IORef Int
bufferPos
    let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
    a -> IO a
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> a -> IO a
forall a b. (a -> b) -> a -> b
$ Int32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32 -> a) -> Int32 -> a
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))

readInt32FromBuffer :: Ptr b -> IORef Int -> IO Int32
readInt32FromBuffer :: forall b. Ptr b -> IORef Int -> IO Int32
readInt32FromBuffer Ptr b
buf IORef Int
bufferPos = do
    Int32
n <- (Int64 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int32) -> IO Int64 -> IO Int32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a b. Integral a => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer @Int64 Ptr b
buf IORef Int
bufferPos) :: IO Int32
    let u :: Word32
u = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
n :: Word32
    Int32 -> IO Int32
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int32 -> IO Int32) -> Int32 -> IO Int32
forall a b. (a -> b) -> a -> b
$ (Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32
u Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftR` Int
1) :: Int32) Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.^. (-(Int32
n Int32 -> Int32 -> Int32
forall a. Bits a => a -> a -> a
.&. Int32
1))

readString :: Ptr Word8 -> IORef Int -> IO String
readString :: Ptr Word8 -> IORef Int -> IO String
readString Ptr Word8
buf IORef Int
pos = do
    Int
nameSize <- forall a b. Integral a => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer @Int Ptr Word8
buf IORef Int
pos
    (Word8 -> Char) -> [Word8] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Char
chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) ([Word8] -> String) -> IO [Word8] -> IO String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> IO Word8 -> IO [Word8]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
nameSize (IORef Int -> Ptr Word8 -> IO Word8
forall b. IORef Int -> Ptr b -> IO Word8
readAndAdvance IORef Int
pos Ptr Word8
buf)

readBytes :: Handle -> Int64 -> Int64 -> IO [Word8]
readBytes :: Handle -> Int64 -> Int64 -> IO [Word8]
readBytes Handle
handle Int64
colStart Int64
colLen = do
    Ptr Word8
buf <- Int -> IO (Ptr Word8)
forall a. Int -> IO (Ptr a)
mallocBytes (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
colLen) :: IO (Ptr Word8)
    Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
handle SeekMode
AbsoluteSeek (Int64 -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
colStart)
    Int
_ <- Handle -> Ptr Word8 -> Int -> IO Int
forall a. Handle -> Ptr a -> Int -> IO Int
hGetBuf Handle
handle Ptr Word8
buf (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
colLen)
    [Word8]
columnBytes <- (Int -> IO Word8) -> [Int] -> IO [Word8]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr Word8
buf) [Int
0 .. Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
colLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
    Ptr Word8 -> IO ()
forall a. Ptr a -> IO ()
free Ptr Word8
buf
    [Word8] -> IO [Word8]
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Word8]
columnBytes

numBytesInFile :: Handle -> IO Integer
numBytesInFile :: Handle -> IO Integer
numBytesInFile Handle
handle = do
    Handle -> SeekMode -> Integer -> IO ()
hSeek Handle
handle SeekMode
SeekFromEnd Integer
0
    Handle -> IO Integer
hTell Handle
handle

readByteStringFromBytes :: [Word8] -> ([Word8], [Word8])
readByteStringFromBytes :: [Word8] -> ([Word8], [Word8])
readByteStringFromBytes [Word8]
xs =
    let
        (Int
size, [Word8]
rem) = forall a. Integral a => [Word8] -> (a, [Word8])
readVarIntFromBytes @Int [Word8]
xs
     in
        (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take Int
size [Word8]
rem, Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop Int
size [Word8]
rem)

readByteString :: Ptr Word8 -> IORef Int -> IO [Word8]
readByteString :: Ptr Word8 -> IORef Int -> IO [Word8]
readByteString Ptr Word8
buf IORef Int
pos = do
    Int
size <- forall a b. Integral a => Ptr b -> IORef Int -> IO a
readVarIntFromBuffer @Int Ptr Word8
buf IORef Int
pos
    Int -> IO Word8 -> IO [Word8]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
size (IORef Int -> Ptr Word8 -> IO Word8
forall b. IORef Int -> Ptr b -> IO Word8
readAndAdvance IORef Int
pos Ptr Word8
buf)

readByteString' :: Ptr Word8 -> Int64 -> IO [Word8]
readByteString' :: Ptr Word8 -> Int64 -> IO [Word8]
readByteString' Ptr Word8
buf Int64
size = (Int64 -> IO Word8) -> [Int64] -> IO [Word8]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (Int64 -> Ptr Word8 -> IO Word8
forall b. Int64 -> Ptr b -> IO Word8
`readSingleByte` Ptr Word8
buf) [Int64
0 .. (Int64
size Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int64
1)]

readSingleByte :: Int64 -> Ptr b -> IO Word8
readSingleByte :: forall b. Int64 -> Ptr b -> IO Word8
readSingleByte Int64
pos Ptr b
buffer = Ptr b -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr b
buffer (Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int64
pos)

readNoAdvance :: IORef Int -> Ptr b -> IO Word8
readNoAdvance :: forall b. IORef Int -> Ptr b -> IO Word8
readNoAdvance IORef Int
bufferPos Ptr b
buffer = do
    Int
pos <- IORef Int -> IO Int
forall a. IORef a -> IO a
readIORef IORef Int
bufferPos
    Ptr b -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr b
buffer Int
pos :: IO Word8