{-# LANGUAGE TypeApplications #-} module DataFrame.IO.Parquet.Binary where import Control.Monad import Data.Bits import Data.Char import Data.IORef import Data.Word import Foreign import System.IO littleEndianWord32 :: [Word8] -> Word32 littleEndianWord32 :: [Word8] -> Word32 littleEndianWord32 [Word8] bytes | [Word8] -> Int forall a. [a] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length [Word8] bytes Int -> Int -> Bool forall a. Ord a => a -> a -> Bool >= Int 4 = (Word32 -> Word32 -> Word32) -> Word32 -> [Word32] -> Word32 forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr Word32 -> Word32 -> Word32 forall a. Bits a => a -> a -> a (.|.) Word32 0 ((Word8 -> Int -> Word32) -> [Word8] -> [Int] -> [Word32] forall a b c. (a -> b -> c) -> [a] -> [b] -> [c] zipWith (\Word8 b Int i -> (Word8 -> Word32 forall a b. (Integral a, Num b) => a -> b fromIntegral Word8 b) Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftL` Int i) (Int -> [Word8] -> [Word8] forall a. Int -> [a] -> [a] take Int 4 [Word8] bytes) [Int 0, Int 8, Int 16, Int 24]) | Bool otherwise = [Word8] -> Word32 littleEndianWord32 (Int -> [Word8] -> [Word8] forall a. Int -> [a] -> [a] take Int 4 ([Word8] -> [Word8]) -> [Word8] -> [Word8] forall a b. (a -> b) -> a -> b $ [Word8] bytes [Word8] -> [Word8] -> [Word8] forall a. [a] -> [a] -> [a] ++ Word8 -> [Word8] forall a. a -> [a] repeat Word8 0) littleEndianWord64 :: [Word8] -> Word64 littleEndianWord64 :: [Word8] -> Word64 littleEndianWord64 [Word8] bytes = (Word64 -> Word64 -> Word64) -> Word64 -> [Word64] -> Word64 forall a b. (a -> b -> b) -> b -> [a] -> b forall (t :: * -> *) a b. Foldable t => (a -> b -> b) -> b -> t a -> b foldr Word64 -> Word64 -> Word64 forall a. Bits a => a -> a -> a (.|.) Word64 0 ((Word8 -> Int -> Word64) -> [Word8] -> [Int] -> [Word64] forall a b c. (a -> b -> c) -> [a] -> [b] -> [c] zipWith (\Word8 b Int i -> (Word8 -> Word64 forall a b. (Integral a, Num b) => a -> b fromIntegral Word8 b) Word64 -> Int -> Word64 forall a. Bits a => a -> Int -> a `shiftL` Int i) (Int -> [Word8] -> [Word8] forall a. Int -> [a] -> [a] take Int 8 [Word8] bytes) [Int 0, Int 8 ..]) littleEndianInt32 :: [Word8] -> Int32 littleEndianInt32 :: [Word8] -> Int32 littleEndianInt32 = Word32 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 -> Int32) -> ([Word8] -> Word32) -> [Word8] -> Int32 forall b c a. (b -> c) -> (a -> b) -> a -> c . [Word8] -> Word32 littleEndianWord32 word64ToLittleEndian :: Word64 -> [Word8] word64ToLittleEndian :: Word64 -> [Word8] word64ToLittleEndian Word64 w = (Int -> Word8) -> [Int] -> [Word8] forall a b. (a -> b) -> [a] -> [b] map (\Int i -> Word64 -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word64 w Word64 -> Int -> Word64 forall a. Bits a => a -> Int -> a `shiftR` Int i)) [Int 0, Int 8, Int 16, Int 24, Int 32, Int 40, Int 48, Int 56] word32ToLittleEndian :: Word32 -> [Word8] word32ToLittleEndian :: Word32 -> [Word8] word32ToLittleEndian Word32 w = (Int -> Word8) -> [Int] -> [Word8] forall a b. (a -> b) -> [a] -> [b] map (\Int i -> Word32 -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 w Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftR` Int i)) [Int 0, Int 8, Int 16, Int 24] readUVarInt :: [Word8] -> (Word64, [Word8]) readUVarInt :: [Word8] -> (Word64, [Word8]) readUVarInt [Word8] xs = [Word8] -> Word64 -> Int -> Integer -> (Word64, [Word8]) forall {a} {t} {t}. (Integral a, Num t, Num t, Bits t, Bits a, Eq t) => [a] -> t -> Int -> t -> (t, [a]) loop [Word8] xs Word64 0 Int 0 Integer 0 where loop :: [a] -> t -> Int -> t -> (t, [a]) loop [a] bs t x Int _ t 10 = (t x, [a] bs) loop (a b : [a] bs) t x Int s t i | a b a -> a -> Bool forall a. Ord a => a -> a -> Bool < a 0x80 = (t x t -> t -> t forall a. Bits a => a -> a -> a .|. ((a -> t forall a b. (Integral a, Num b) => a -> b fromIntegral a b) t -> Int -> t forall a. Bits a => a -> Int -> a `shiftL` Int s), [a] bs) | Bool otherwise = [a] -> t -> Int -> t -> (t, [a]) loop [a] bs (t x t -> t -> t forall a. Bits a => a -> a -> a .|. (a -> t forall a b. (Integral a, Num b) => a -> b fromIntegral ((a b a -> a -> a forall a. Bits a => a -> a -> a .&. a 0x7f) a -> Int -> a forall a. Bits a => a -> Int -> a `shiftL` Int s))) (Int s Int -> Int -> Int forall a. Num a => a -> a -> a + Int 7) (t i t -> t -> t forall a. Num a => a -> a -> a + t 1) readVarIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8]) readVarIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8]) readVarIntFromBytes [Word8] bs = (Integer -> a forall a b. (Integral a, Num b) => a -> b fromIntegral Integer n, [Word8] rem) where (Integer n, [Word8] rem) = Int -> Integer -> [Word8] -> (Integer, [Word8]) forall {a}. (Integral a, Bits a) => Int -> Integer -> [a] -> (Integer, [a]) loop Int 0 Integer 0 [Word8] bs loop :: Int -> Integer -> [a] -> (Integer, [a]) loop Int _ Integer result [] = (Integer result, []) loop Int shift Integer result (a x : [a] xs) = let res :: Integer res = Integer result Integer -> Integer -> Integer forall a. Bits a => a -> a -> a .|. ((a -> Integer forall a b. (Integral a, Num b) => a -> b fromIntegral (a x a -> a -> a forall a. Bits a => a -> a -> a .&. a 0x7f) :: Integer) Integer -> Int -> Integer forall a. Bits a => a -> Int -> a `shiftL` Int shift) in if (a x a -> a -> a forall a. Bits a => a -> a -> a .&. a 0x80) a -> a -> Bool forall a. Eq a => a -> a -> Bool /= a 0x80 then (Integer res, [a] xs) else Int -> Integer -> [a] -> (Integer, [a]) loop (Int shift Int -> Int -> Int forall a. Num a => a -> a -> a + Int 7) Integer res [a] xs readIntFromBytes :: (Integral a) => [Word8] -> (a, [Word8]) readIntFromBytes :: forall a. Integral a => [Word8] -> (a, [Word8]) readIntFromBytes [Word8] bs = let (Int32 n, [Word8] rem) = [Word8] -> (Int32, [Word8]) forall a. Integral a => [Word8] -> (a, [Word8]) readVarIntFromBytes [Word8] bs u :: Word32 u = Int32 -> Word32 forall a b. (Integral a, Num b) => a -> b fromIntegral Int32 n :: Word32 in (Int32 -> a forall a b. (Integral a, Num b) => a -> b fromIntegral (Int32 -> a) -> Int32 -> a forall a b. (a -> b) -> a -> b $ (Word32 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 u Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftR` Int 1) :: Int32) Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .^. (-(Int32 n Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .&. Int32 1)), [Word8] rem) readInt32FromBytes :: [Word8] -> (Int32, [Word8]) readInt32FromBytes :: [Word8] -> (Int32, [Word8]) readInt32FromBytes [Word8] bs = let (Int64 n', [Word8] rem) = forall a. Integral a => [Word8] -> (a, [Word8]) readVarIntFromBytes @Int64 [Word8] bs n :: Int32 n = Int64 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 n' :: Int32 u :: Word32 u = Int32 -> Word32 forall a b. (Integral a, Num b) => a -> b fromIntegral Int32 n :: Word32 in ((Word32 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 u Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftR` Int 1) :: Int32) Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .^. (-(Int32 n Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .&. Int32 1)), [Word8] rem) readAndAdvance :: IORef Int -> Ptr b -> IO Word8 readAndAdvance :: forall b. IORef Int -> Ptr b -> IO Word8 readAndAdvance IORef Int bufferPos Ptr b buffer = do Int pos <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int bufferPos Word8 b <- Ptr b -> Int -> IO Word8 forall b. Ptr b -> Int -> IO Word8 forall a b. Storable a => Ptr b -> Int -> IO a peekByteOff Ptr b buffer Int pos :: IO Word8 IORef Int -> (Int -> Int) -> IO () forall a. IORef a -> (a -> a) -> IO () modifyIORef IORef Int bufferPos (Int -> Int -> Int forall a. Num a => a -> a -> a + Int 1) Word8 -> IO Word8 forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return Word8 b readVarIntFromBuffer :: (Integral a) => Ptr b -> IORef Int -> IO a readVarIntFromBuffer :: forall a b. Integral a => Ptr b -> IORef Int -> IO a readVarIntFromBuffer Ptr b buf IORef Int bufferPos = do Int start <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int bufferPos let loop :: t -> Int -> Integer -> IO Integer loop t i Int shift Integer result = do Word8 b <- IORef Int -> Ptr b -> IO Word8 forall b. IORef Int -> Ptr b -> IO Word8 readAndAdvance IORef Int bufferPos Ptr b buf let res :: Integer res = Integer result Integer -> Integer -> Integer forall a. Bits a => a -> a -> a .|. ((Word8 -> Integer forall a b. (Integral a, Num b) => a -> b fromIntegral (Word8 b Word8 -> Word8 -> Word8 forall a. Bits a => a -> a -> a .&. Word8 0x7f) :: Integer) Integer -> Int -> Integer forall a. Bits a => a -> Int -> a `shiftL` Int shift) if (Word8 b Word8 -> Word8 -> Word8 forall a. Bits a => a -> a -> a .&. Word8 0x80) Word8 -> Word8 -> Bool forall a. Eq a => a -> a -> Bool /= Word8 0x80 then Integer -> IO Integer forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return Integer res else t -> Int -> Integer -> IO Integer loop (t i t -> t -> t forall a. Num a => a -> a -> a + t 1) (Int shift Int -> Int -> Int forall a. Num a => a -> a -> a + Int 7) Integer res Integer -> a forall a b. (Integral a, Num b) => a -> b fromIntegral (Integer -> a) -> IO Integer -> IO a forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Int -> Int -> Integer -> IO Integer forall {t}. Num t => t -> Int -> Integer -> IO Integer loop Int start Int 0 Integer 0 readIntFromBuffer :: (Integral a) => Ptr b -> IORef Int -> IO a readIntFromBuffer :: forall a b. Integral a => Ptr b -> IORef Int -> IO a readIntFromBuffer Ptr b buf IORef Int bufferPos = do Int32 n <- Ptr b -> IORef Int -> IO Int32 forall a b. Integral a => Ptr b -> IORef Int -> IO a readVarIntFromBuffer Ptr b buf IORef Int bufferPos let u :: Word32 u = Int32 -> Word32 forall a b. (Integral a, Num b) => a -> b fromIntegral Int32 n :: Word32 a -> IO a forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (a -> IO a) -> a -> IO a forall a b. (a -> b) -> a -> b $ Int32 -> a forall a b. (Integral a, Num b) => a -> b fromIntegral (Int32 -> a) -> Int32 -> a forall a b. (a -> b) -> a -> b $ (Word32 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 u Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftR` Int 1) :: Int32) Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .^. (-(Int32 n Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .&. Int32 1)) readInt32FromBuffer :: Ptr b -> IORef Int -> IO Int32 readInt32FromBuffer :: forall b. Ptr b -> IORef Int -> IO Int32 readInt32FromBuffer Ptr b buf IORef Int bufferPos = do Int32 n <- (Int64 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int64 -> Int32) -> IO Int64 -> IO Int32 forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> forall a b. Integral a => Ptr b -> IORef Int -> IO a readVarIntFromBuffer @Int64 Ptr b buf IORef Int bufferPos) :: IO Int32 let u :: Word32 u = Int32 -> Word32 forall a b. (Integral a, Num b) => a -> b fromIntegral Int32 n :: Word32 Int32 -> IO Int32 forall a. a -> IO a forall (m :: * -> *) a. Monad m => a -> m a return (Int32 -> IO Int32) -> Int32 -> IO Int32 forall a b. (a -> b) -> a -> b $ (Word32 -> Int32 forall a b. (Integral a, Num b) => a -> b fromIntegral (Word32 u Word32 -> Int -> Word32 forall a. Bits a => a -> Int -> a `shiftR` Int 1) :: Int32) Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .^. (-(Int32 n Int32 -> Int32 -> Int32 forall a. Bits a => a -> a -> a .&. Int32 1)) readString :: Ptr Word8 -> IORef Int -> IO String readString :: Ptr Word8 -> IORef Int -> IO String readString Ptr Word8 buf IORef Int pos = do Int nameSize <- forall a b. Integral a => Ptr b -> IORef Int -> IO a readVarIntFromBuffer @Int Ptr Word8 buf IORef Int pos (Word8 -> Char) -> [Word8] -> String forall a b. (a -> b) -> [a] -> [b] map (Int -> Char chr (Int -> Char) -> (Word8 -> Int) -> Word8 -> Char forall b c a. (b -> c) -> (a -> b) -> a -> c . Word8 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral) ([Word8] -> String) -> IO [Word8] -> IO String forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Int -> IO Word8 -> IO [Word8] forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a] replicateM Int nameSize (IORef Int -> Ptr Word8 -> IO Word8 forall b. IORef Int -> Ptr b -> IO Word8 readAndAdvance IORef Int pos Ptr Word8 buf) readBytes :: Handle -> Int64 -> Int64 -> IO [Word8] readBytes :: Handle -> Int64 -> Int64 -> IO [Word8] readBytes Handle handle Int64 colStart Int64 colLen = do Ptr Word8 buf <- Int -> IO (Ptr Word8) forall a. Int -> IO (Ptr a) mallocBytes (Int64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 colLen) :: IO (Ptr Word8) Handle -> SeekMode -> Integer -> IO () hSeek Handle handle SeekMode AbsoluteSeek (Int64 -> Integer forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 colStart) Int _ <- Handle -> Ptr Word8 -> Int -> IO Int forall a. Handle -> Ptr a -> Int -> IO Int hGetBuf Handle handle Ptr Word8 buf (Int64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 colLen) [Word8] columnBytes <- (Int -> IO Word8) -> [Int] -> IO [Word8] forall (t :: * -> *) (m :: * -> *) a b. (Traversable t, Monad m) => (a -> m b) -> t a -> m (t b) forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b] mapM (Ptr Word8 -> Int -> IO Word8 forall b. Ptr b -> Int -> IO Word8 forall a b. Storable a => Ptr b -> Int -> IO a peekByteOff Ptr Word8 buf) [Int 0 .. Int64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 colLen Int -> Int -> Int forall a. Num a => a -> a -> a - Int 1] Ptr Word8 -> IO () forall a. Ptr a -> IO () free Ptr Word8 buf [Word8] -> IO [Word8] forall a. a -> IO a forall (f :: * -> *) a. Applicative f => a -> f a pure [Word8] columnBytes numBytesInFile :: Handle -> IO Integer numBytesInFile :: Handle -> IO Integer numBytesInFile Handle handle = do Handle -> SeekMode -> Integer -> IO () hSeek Handle handle SeekMode SeekFromEnd Integer 0 Handle -> IO Integer hTell Handle handle readByteStringFromBytes :: [Word8] -> ([Word8], [Word8]) readByteStringFromBytes :: [Word8] -> ([Word8], [Word8]) readByteStringFromBytes [Word8] xs = let (Int size, [Word8] rem) = forall a. Integral a => [Word8] -> (a, [Word8]) readVarIntFromBytes @Int [Word8] xs in (Int -> [Word8] -> [Word8] forall a. Int -> [a] -> [a] take Int size [Word8] rem, Int -> [Word8] -> [Word8] forall a. Int -> [a] -> [a] drop Int size [Word8] rem) readByteString :: Ptr Word8 -> IORef Int -> IO [Word8] readByteString :: Ptr Word8 -> IORef Int -> IO [Word8] readByteString Ptr Word8 buf IORef Int pos = do Int size <- forall a b. Integral a => Ptr b -> IORef Int -> IO a readVarIntFromBuffer @Int Ptr Word8 buf IORef Int pos Int -> IO Word8 -> IO [Word8] forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a] replicateM Int size (IORef Int -> Ptr Word8 -> IO Word8 forall b. IORef Int -> Ptr b -> IO Word8 readAndAdvance IORef Int pos Ptr Word8 buf) readByteString' :: Ptr Word8 -> Int64 -> IO [Word8] readByteString' :: Ptr Word8 -> Int64 -> IO [Word8] readByteString' Ptr Word8 buf Int64 size = (Int64 -> IO Word8) -> [Int64] -> IO [Word8] forall (t :: * -> *) (m :: * -> *) a b. (Traversable t, Monad m) => (a -> m b) -> t a -> m (t b) forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b] mapM (Int64 -> Ptr Word8 -> IO Word8 forall b. Int64 -> Ptr b -> IO Word8 `readSingleByte` Ptr Word8 buf) [Int64 0 .. (Int64 size Int64 -> Int64 -> Int64 forall a. Num a => a -> a -> a - Int64 1)] readSingleByte :: Int64 -> Ptr b -> IO Word8 readSingleByte :: forall b. Int64 -> Ptr b -> IO Word8 readSingleByte Int64 pos Ptr b buffer = Ptr b -> Int -> IO Word8 forall b. Ptr b -> Int -> IO Word8 forall a b. Storable a => Ptr b -> Int -> IO a peekByteOff Ptr b buffer (Int64 -> Int forall a b. (Integral a, Num b) => a -> b fromIntegral Int64 pos) readNoAdvance :: IORef Int -> Ptr b -> IO Word8 readNoAdvance :: forall b. IORef Int -> Ptr b -> IO Word8 readNoAdvance IORef Int bufferPos Ptr b buffer = do Int pos <- IORef Int -> IO Int forall a. IORef a -> IO a readIORef IORef Int bufferPos Ptr b -> Int -> IO Word8 forall b. Ptr b -> Int -> IO Word8 forall a b. Storable a => Ptr b -> Int -> IO a peekByteOff Ptr b buffer Int pos :: IO Word8