{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeApplications #-}

{- | Simple column-oriented binary spill format (DFBN).

Layout (all integers little-endian):

@
[magic:       4  bytes] "DFBN"
[num_columns: 4  bytes] Word32
  per column:
    [name_len:  2  bytes] Word16  (byte length of UTF-8 name)
    [name:     name_len bytes]
    [type_tag:  1  byte]  Word8
[num_rows:    8  bytes] Word64

per column data block (order matches schema):
  type_tag 0 (Int):            num_rows × Int64 LE
  type_tag 1 (Double):         num_rows × Double LE (IEEE 754)
  type_tag 2 (Text):           (num_rows+1) × Word32 offsets  ++  payload bytes (UTF-8)
  type_tag 3 (Maybe Int):      ceil(num_rows/8)-byte null bitmap  ++  num_rows × Int64 LE
  type_tag 4 (Maybe Double):   ceil(num_rows/8)-byte null bitmap  ++  num_rows × Double LE
  type_tag 5 (Maybe Text):     ceil(num_rows/8)-byte null bitmap
                                ++  (num_rows+1) × Word32 offsets  ++  payload bytes
@

Null bitmap: bit @i@ of byte @i\/8@ is 1 when row @i@ is non-null.
-}
module DataFrame.Lazy.IO.Binary (
    spillToDisk,
    readSpilled,
    withSpilled,
) where

import Control.Exception (SomeException, bracket, try)
import Control.Monad (foldM, void, when)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Internal as BSI
import qualified Data.ByteString.Unsafe as BSU
import qualified Data.List as L
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V
import qualified Data.Vector.Storable as VS
import qualified Data.Vector.Unboxed as VU

import Data.Bits (setBit, shiftL, testBit, (.|.))
import Data.Maybe (fromMaybe, isJust)
import Data.Type.Equality (TestEquality (testEquality), type (:~:) (Refl))
import Data.Word (Word16, Word32, Word64, Word8)
import DataFrame.Internal.Column (Column (..))
import DataFrame.Internal.DataFrame (DataFrame (..))
import Foreign (ForeignPtr, castForeignPtr, plusForeignPtr, sizeOf)
import System.Directory (getTemporaryDirectory, removeFile)
import System.IO (IOMode (..), hClose, openTempFile, withFile)
import Type.Reflection (typeRep)

-- ---------------------------------------------------------------------------
-- Type tags
-- ---------------------------------------------------------------------------

tagInt, tagDouble, tagText, tagMaybeInt, tagMaybeDouble, tagMaybeText :: Word8
tagInt :: Word8
tagInt = Word8
0
tagDouble :: Word8
tagDouble = Word8
1
tagText :: Word8
tagText = Word8
2
tagMaybeInt :: Word8
tagMaybeInt = Word8
3
tagMaybeDouble :: Word8
tagMaybeDouble = Word8
4
tagMaybeText :: Word8
tagMaybeText = Word8
5

-- ---------------------------------------------------------------------------
-- Write
-- ---------------------------------------------------------------------------

-- | Serialise a 'DataFrame' to a DFBN binary file.
spillToDisk :: FilePath -> DataFrame -> IO ()
spillToDisk :: String -> DataFrame -> IO ()
spillToDisk String
path DataFrame
df =
    String -> IOMode -> (Handle -> IO ()) -> IO ()
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
withFile String
path IOMode
WriteMode ((Handle -> IO ()) -> IO ()) -> (Handle -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Handle
h -> Handle -> Builder -> IO ()
BSB.hPutBuilder Handle
h (DataFrame -> Builder
buildDataFrame DataFrame
df)

buildDataFrame :: DataFrame -> BSB.Builder
buildDataFrame :: DataFrame -> Builder
buildDataFrame DataFrame
df =
    StrictByteString -> Builder
BSB.byteString StrictByteString
"DFBN"
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word32 -> Builder
BSB.word32LE Word32
ncols
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ((Text, Column) -> Builder) -> [(Text, Column)] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ((Text -> Column -> Builder) -> (Text, Column) -> Builder
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Column -> Builder
buildColumnSchema) ([Text] -> [Column] -> [(Text, Column)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names [Column]
cols)
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word64 -> Builder
BSB.word64LE Word64
nrows
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Column -> Builder) -> [Column] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (Int -> Column -> Builder
buildColumnData Int
nrowsInt) [Column]
cols
  where
    names :: [Text]
names =
        ((Text, Int) -> Text) -> [(Text, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap
            (Text, Int) -> Text
forall a b. (a, b) -> a
fst
            (((Text, Int) -> (Text, Int) -> Ordering)
-> [(Text, Int)] -> [(Text, Int)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
L.sortBy (\(Text, Int)
a (Text, Int)
b -> Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ((Text, Int) -> Int
forall a b. (a, b) -> b
snd (Text, Int)
a) ((Text, Int) -> Int
forall a b. (a, b) -> b
snd (Text, Int)
b)) (Map Text Int -> [(Text, Int)]
forall k a. Map k a -> [(k, a)]
M.toList (DataFrame -> Map Text Int
columnIndices DataFrame
df)))
    ncols :: Word32
ncols = Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Text] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
names) :: Word32
    cols :: [Column]
cols = Vector Column -> [Column]
forall a. Vector a -> [a]
V.toList (DataFrame -> Vector Column
columns DataFrame
df)
    nrowsInt :: Int
nrowsInt = (Int, Int) -> Int
forall a b. (a, b) -> a
fst (DataFrame -> (Int, Int)
dataframeDimensions DataFrame
df)
    nrows :: Word64
nrows = Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nrowsInt :: Word64

buildColumnSchema :: T.Text -> Column -> BSB.Builder
buildColumnSchema :: Text -> Column -> Builder
buildColumnSchema Text
name Column
col =
    Word16 -> Builder
BSB.word16LE Word16
nameLen
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> StrictByteString -> Builder
BSB.byteString StrictByteString
nameBytes
        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word8 -> Builder
BSB.word8 (Column -> Word8
columnTypeTag Column
col)
  where
    nameBytes :: StrictByteString
nameBytes = Text -> StrictByteString
TE.encodeUtf8 Text
name
    nameLen :: Word16
nameLen = Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int
BS.length StrictByteString
nameBytes) :: Word16

columnTypeTag :: Column -> Word8
columnTypeTag :: Column -> Word8
columnTypeTag (UnboxedColumn (Vector a
_ :: VU.Vector a)) =
    case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
        Just a :~: Int
Refl -> Word8
tagInt
        Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
            Just a :~: Double
Refl -> Word8
tagDouble
            Maybe (a :~: Double)
Nothing -> String -> Word8
forall a. HasCallStack => String -> a
error String
"spillToDisk: unsupported UnboxedColumn element type"
columnTypeTag (BoxedColumn Vector a
_) = Word8
tagText
columnTypeTag (OptionalColumn (Vector (Maybe a)
_ :: V.Vector (Maybe a))) =
    case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
        Just a :~: Int
Refl -> Word8
tagMaybeInt
        Maybe (a :~: Int)
Nothing -> case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
            Just a :~: Double
Refl -> Word8
tagMaybeDouble
            Maybe (a :~: Double)
Nothing -> Word8
tagMaybeText

buildColumnData :: Int -> Column -> BSB.Builder
buildColumnData :: Int -> Column -> Builder
buildColumnData Int
_ (UnboxedColumn (Vector a
v :: VU.Vector a)) =
    case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
        Just a :~: Int
Refl -> Vector Int -> Builder
buildIntVector Vector a
Vector Int
v
        Maybe (a :~: Int)
Nothing ->
            case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                Just a :~: Double
Refl -> Vector Double -> Builder
buildDoubleVector Vector a
Vector Double
v
                Maybe (a :~: Double)
Nothing -> String -> Builder
forall a. HasCallStack => String -> a
error String
"spillToDisk: unsupported UnboxedColumn element type"
buildColumnData Int
_ (BoxedColumn (Vector a
v :: V.Vector a)) =
    case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
        Just a :~: Text
Refl -> Vector Text -> Builder
buildTextVector Vector a
Vector Text
v
        Maybe (a :~: Text)
Nothing -> String -> Builder
forall a. HasCallStack => String -> a
error String
"spillToDisk: unsupported BoxedColumn element type"
buildColumnData Int
_ (OptionalColumn (Vector (Maybe a)
v :: V.Vector (Maybe a))) =
    case TypeRep a -> TypeRep Int -> Maybe (a :~: Int)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Int) of
        Just a :~: Int
Refl ->
            Vector Bool -> Builder
buildNullBitmap ((Maybe a -> Bool) -> Vector (Maybe a) -> Vector Bool
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe a -> Bool
forall a. Maybe a -> Bool
isJust Vector (Maybe a)
v)
                Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Vector Int -> Builder
buildIntVector (Vector Int -> Vector Int
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert ((Maybe Int -> Int) -> Vector (Maybe Int) -> Vector Int
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0) Vector (Maybe a)
Vector (Maybe Int)
v))
        Maybe (a :~: Int)
Nothing ->
            case TypeRep a -> TypeRep Double -> Maybe (a :~: Double)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @Double) of
                Just a :~: Double
Refl ->
                    Vector Bool -> Builder
buildNullBitmap ((Maybe a -> Bool) -> Vector (Maybe a) -> Vector Bool
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe a -> Bool
forall a. Maybe a -> Bool
isJust Vector (Maybe a)
v)
                        Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Vector Double -> Builder
buildDoubleVector (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert ((Maybe Double -> Double) -> Vector (Maybe Double) -> Vector Double
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Double -> Maybe Double -> Double
forall a. a -> Maybe a -> a
fromMaybe Double
0.0) Vector (Maybe a)
Vector (Maybe Double)
v))
                Maybe (a :~: Double)
Nothing ->
                    let showText :: a -> Text
showText a
x = case TypeRep a -> TypeRep Text -> Maybe (a :~: Text)
forall a b. TypeRep a -> TypeRep b -> Maybe (a :~: b)
forall {k} (f :: k -> *) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @a) (forall a. Typeable a => TypeRep a
forall {k} (a :: k). Typeable a => TypeRep a
typeRep @T.Text) of
                            Just a :~: Text
Refl -> a
Text
x
                            Maybe (a :~: Text)
Nothing -> String -> Text
T.pack (a -> String
forall a. Show a => a -> String
show a
x)
                        texts :: Vector Text
texts = (Maybe a -> Text) -> Vector (Maybe a) -> Vector Text
forall a b. (a -> b) -> Vector a -> Vector b
V.map (Text -> (a -> Text) -> Maybe a -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
T.empty a -> Text
showText) Vector (Maybe a)
v
                     in Vector Bool -> Builder
buildNullBitmap ((Maybe a -> Bool) -> Vector (Maybe a) -> Vector Bool
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe a -> Bool
forall a. Maybe a -> Bool
isJust Vector (Maybe a)
v) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Vector Text -> Builder
buildTextVector Vector Text
texts

{- | Bulk-encode an Int vector as 8-byte LE values (native layout on LE platforms).
hPutBuilder flushes synchronously so the underlying ForeignPtr outlives the Builder.
-}
buildIntVector :: VU.Vector Int -> BSB.Builder
buildIntVector :: Vector Int -> Builder
buildIntVector Vector Int
v =
    let sv :: Vector Int
sv = Vector Int -> Vector Int
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Int
v :: VS.Vector Int
        (ForeignPtr Int
fp, Int
n) = Vector Int -> (ForeignPtr Int, Int)
forall a. Vector a -> (ForeignPtr a, Int)
VS.unsafeToForeignPtr0 Vector Int
sv
        bs :: StrictByteString
bs = ForeignPtr Word8 -> Int -> Int -> StrictByteString
BSI.fromForeignPtr (ForeignPtr Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr Int
fp) Int
0 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int -> Int
forall a. Storable a => a -> Int
sizeOf (Int
0 :: Int))
     in StrictByteString -> Builder
BSB.byteString StrictByteString
bs

-- | Bulk-encode a Double vector as 8-byte LE IEEE 754 values (native layout on LE platforms).
buildDoubleVector :: VU.Vector Double -> BSB.Builder
buildDoubleVector :: Vector Double -> Builder
buildDoubleVector Vector Double
v =
    let sv :: Vector Double
sv = Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
v :: VS.Vector Double
        (ForeignPtr Double
fp, Int
n) = Vector Double -> (ForeignPtr Double, Int)
forall a. Vector a -> (ForeignPtr a, Int)
VS.unsafeToForeignPtr0 Vector Double
sv
        bs :: StrictByteString
bs = ForeignPtr Word8 -> Int -> Int -> StrictByteString
BSI.fromForeignPtr (ForeignPtr Double -> ForeignPtr Word8
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr ForeignPtr Double
fp) Int
0 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Double -> Int
forall a. Storable a => a -> Int
sizeOf (Double
0 :: Double))
     in StrictByteString -> Builder
BSB.byteString StrictByteString
bs

-- | Write a Text vector: (num_rows+1) Word32 offsets followed by UTF-8 payload.
buildTextVector :: V.Vector T.Text -> BSB.Builder
buildTextVector :: Vector Text -> Builder
buildTextVector Vector Text
v =
    (Word32 -> Builder) -> [Word32] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap Word32 -> Builder
BSB.word32LE [Word32]
offsets Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (StrictByteString -> Builder) -> [StrictByteString] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap StrictByteString -> Builder
BSB.byteString [StrictByteString]
encoded
  where
    encoded :: [StrictByteString]
encoded = Vector StrictByteString -> [StrictByteString]
forall a. Vector a -> [a]
V.toList ((Text -> StrictByteString)
-> Vector Text -> Vector StrictByteString
forall a b. (a -> b) -> Vector a -> Vector b
V.map Text -> StrictByteString
TE.encodeUtf8 Vector Text
v)
    offsets :: [Word32]
offsets = (Word32 -> StrictByteString -> Word32)
-> Word32 -> [StrictByteString] -> [Word32]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl (\Word32
acc StrictByteString
bs -> Word32
acc Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int
BS.length StrictByteString
bs)) (Word32
0 :: Word32) [StrictByteString]
encoded

-- | Build a null-validity bitmap: 1 bit per row, packed LSB-first into bytes.
buildNullBitmap :: V.Vector Bool -> BSB.Builder
buildNullBitmap :: Vector Bool -> Builder
buildNullBitmap Vector Bool
valids = (Int -> Builder) -> [Int] -> Builder
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (Word8 -> Builder
BSB.word8 (Word8 -> Builder) -> (Int -> Word8) -> Int -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
mkByte) [Int
0 .. Int
numBytes Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
  where
    n :: Int
n = Vector Bool -> Int
forall a. Vector a -> Int
V.length Vector Bool
valids
    numBytes :: Int
numBytes = (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8
    mkByte :: Int -> Word8
mkByte Int
byteIdx =
        (Int -> Word8 -> Word8) -> Word8 -> [Int] -> Word8
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr
            ( \Int
bit Word8
acc ->
                let row :: Int
row = Int
byteIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
bit
                 in if Int
row Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n Bool -> Bool -> Bool
&& (Vector Bool
valids Vector Bool -> Int -> Bool
forall a. Vector a -> Int -> a
V.! Int
row) then Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
setBit Word8
acc Int
bit else Word8
acc
            )
            (Word8
0 :: Word8)
            [Int
0 .. Int
7]

-- ---------------------------------------------------------------------------
-- Read
-- ---------------------------------------------------------------------------

-- | @(new_offset, value)@
type ParseResult a = Either String (Int, a)

-- | Deserialise a DFBN binary file into a 'DataFrame'.
readSpilled :: FilePath -> IO DataFrame
readSpilled :: String -> IO DataFrame
readSpilled String
path = do
    StrictByteString
bs <- String -> IO StrictByteString
BS.readFile String
path
    case StrictByteString -> Int -> ParseResult DataFrame
parseDataFrame StrictByteString
bs Int
0 of
        Left String
err -> String -> IO DataFrame
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"readSpilled: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
err)
        Right (Int
_, DataFrame
df) -> DataFrame -> IO DataFrame
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return DataFrame
df

parseDataFrame :: BS.ByteString -> Int -> ParseResult DataFrame
parseDataFrame :: StrictByteString -> Int -> ParseResult DataFrame
parseDataFrame StrictByteString
bs Int
off0 = do
    (Int
off1, StrictByteString
magic) <- StrictByteString -> Int -> Int -> ParseResult StrictByteString
readBytes StrictByteString
bs Int
off0 Int
4
    Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (StrictByteString
magic StrictByteString -> StrictByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= StrictByteString
"DFBN") (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$ String -> Either String ()
forall a b. a -> Either a b
Left String
"bad magic bytes"
    (Int
off2, Word32
ncols) <- StrictByteString -> Int -> ParseResult Word32
readWord32LE StrictByteString
bs Int
off1
    let ncolsInt :: Int
ncolsInt = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ncols :: Int
    (Int
off3, [(Text, Word8)]
schema) <- Int
-> (Int -> ParseResult (Text, Word8))
-> Int
-> ParseResult [(Text, Word8)]
forall a. Int -> (Int -> ParseResult a) -> Int -> ParseResult [a]
readN Int
ncolsInt (StrictByteString -> Int -> ParseResult (Text, Word8)
readColumnSchema StrictByteString
bs) Int
off2
    (Int
off4, Word64
nrows64) <- StrictByteString -> Int -> ParseResult Word64
readWord64LE StrictByteString
bs Int
off3
    let nrows :: Int
nrows = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
nrows64 :: Int
    (Int
off5, [Column]
cols) <-
        ((Int, [Column]) -> (Text, Word8) -> Either String (Int, [Column]))
-> (Int, [Column])
-> [(Text, Word8)]
-> Either String (Int, [Column])
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM
            ( \(Int
o, [Column]
acc) (Text
_, Word8
tag) -> do
                (Int
o', Column
col) <- StrictByteString -> Int -> Int -> Word8 -> ParseResult Column
readColumnData StrictByteString
bs Int
o Int
nrows Word8
tag
                (Int, [Column]) -> Either String (Int, [Column])
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
o', [Column]
acc [Column] -> [Column] -> [Column]
forall a. [a] -> [a] -> [a]
++ [Column
col])
            )
            (Int
off4, [])
            [(Text, Word8)]
schema
    let names :: [Text]
names = ((Text, Word8) -> Text) -> [(Text, Word8)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Word8) -> Text
forall a b. (a, b) -> a
fst [(Text, Word8)]
schema
    (Int, DataFrame) -> ParseResult DataFrame
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return
        ( Int
off5
        , DataFrame
            { columns :: Vector Column
columns = [Column] -> Vector Column
forall a. [a] -> Vector a
V.fromList [Column]
cols
            , columnIndices :: Map Text Int
columnIndices = [(Text, Int)] -> Map Text Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([Text] -> [Int] -> [(Text, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names [Int
0 ..])
            , dataframeDimensions :: (Int, Int)
dataframeDimensions = (Int
nrows, Int
ncolsInt)
            , derivingExpressions :: Map Text UExpr
derivingExpressions = Map Text UExpr
forall k a. Map k a
M.empty
            }
        )

readColumnSchema :: BS.ByteString -> Int -> ParseResult (T.Text, Word8)
readColumnSchema :: StrictByteString -> Int -> ParseResult (Text, Word8)
readColumnSchema StrictByteString
bs Int
off = do
    (Int
off1, Word16
nameLen) <- StrictByteString -> Int -> ParseResult Word16
readWord16LE StrictByteString
bs Int
off
    let nameLenInt :: Int
nameLenInt = Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
nameLen :: Int
    (Int
off2, StrictByteString
nameBytes) <- StrictByteString -> Int -> Int -> ParseResult StrictByteString
readBytes StrictByteString
bs Int
off1 Int
nameLenInt
    (Int
off3, Word8
tag) <- StrictByteString -> Int -> ParseResult Word8
readWord8 StrictByteString
bs Int
off2
    (Int, (Text, Word8)) -> ParseResult (Text, Word8)
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off3, (StrictByteString -> Text
TE.decodeUtf8 StrictByteString
nameBytes, Word8
tag))

readColumnData :: BS.ByteString -> Int -> Int -> Word8 -> ParseResult Column
readColumnData :: StrictByteString -> Int -> Int -> Word8 -> ParseResult Column
readColumnData StrictByteString
bs Int
off Int
nrows Word8
tag
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagInt = do
        (Int
off', Vector Int
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Int)
readIntColumn StrictByteString
bs Int
off Int
nrows
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off', Vector Int -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Int
v)
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagDouble = do
        (Int
off', Vector Double
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Double)
readDoubleColumn StrictByteString
bs Int
off Int
nrows
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off', Vector Double -> Column
forall a. (Columnable a, Unbox a) => Vector a -> Column
UnboxedColumn Vector Double
v)
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagText = do
        (Int
off', Vector Text
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Text)
readTextColumn StrictByteString
bs Int
off Int
nrows
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off', Vector Text -> Column
forall a. Columnable a => Vector a -> Column
BoxedColumn Vector Text
v)
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagMaybeInt = do
        (Int
off1, [Bool]
bitmap) <- StrictByteString -> Int -> Int -> ParseResult [Bool]
readNullBitmap StrictByteString
bs Int
off Int
nrows
        (Int
off2, Vector Int
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Int)
readIntColumn StrictByteString
bs Int
off1 Int
nrows
        let maybes :: Vector (Maybe Int)
maybes =
                [Maybe Int] -> Vector (Maybe Int)
forall a. [a] -> Vector a
V.fromList
                    ((Bool -> Int -> Maybe Int) -> [Bool] -> [Int] -> [Maybe Int]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Bool
valid Int
x -> if Bool
valid then Int -> Maybe Int
forall a. a -> Maybe a
Just Int
x else Maybe Int
forall a. Maybe a
Nothing) [Bool]
bitmap (Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Int
v)) ::
                    V.Vector (Maybe Int)
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off2, Vector (Maybe Int) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn Vector (Maybe Int)
maybes)
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagMaybeDouble = do
        (Int
off1, [Bool]
bitmap) <- StrictByteString -> Int -> Int -> ParseResult [Bool]
readNullBitmap StrictByteString
bs Int
off Int
nrows
        (Int
off2, Vector Double
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Double)
readDoubleColumn StrictByteString
bs Int
off1 Int
nrows
        let maybes :: Vector (Maybe Double)
maybes =
                [Maybe Double] -> Vector (Maybe Double)
forall a. [a] -> Vector a
V.fromList
                    ((Bool -> Double -> Maybe Double)
-> [Bool] -> [Double] -> [Maybe Double]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Bool
valid Double
x -> if Bool
valid then Double -> Maybe Double
forall a. a -> Maybe a
Just Double
x else Maybe Double
forall a. Maybe a
Nothing) [Bool]
bitmap (Vector Double -> [Double]
forall a. Unbox a => Vector a -> [a]
VU.toList Vector Double
v)) ::
                    V.Vector (Maybe Double)
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off2, Vector (Maybe Double) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn Vector (Maybe Double)
maybes)
    | Word8
tag Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
tagMaybeText = do
        (Int
off1, [Bool]
bitmap) <- StrictByteString -> Int -> Int -> ParseResult [Bool]
readNullBitmap StrictByteString
bs Int
off Int
nrows
        (Int
off2, Vector Text
v) <- StrictByteString -> Int -> Int -> ParseResult (Vector Text)
readTextColumn StrictByteString
bs Int
off1 Int
nrows
        let maybes :: Vector (Maybe Text)
maybes =
                [Maybe Text] -> Vector (Maybe Text)
forall a. [a] -> Vector a
V.fromList
                    ((Bool -> Text -> Maybe Text) -> [Bool] -> [Text] -> [Maybe Text]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\Bool
valid Text
x -> if Bool
valid then Text -> Maybe Text
forall a. a -> Maybe a
Just Text
x else Maybe Text
forall a. Maybe a
Nothing) [Bool]
bitmap (Vector Text -> [Text]
forall a. Vector a -> [a]
V.toList Vector Text
v)) ::
                    V.Vector (Maybe T.Text)
        (Int, Column) -> ParseResult Column
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off2, Vector (Maybe Text) -> Column
forall a. Columnable a => Vector (Maybe a) -> Column
OptionalColumn Vector (Maybe Text)
maybes)
    | Bool
otherwise = String -> ParseResult Column
forall a b. a -> Either a b
Left (String
"unknown type tag " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Word8 -> String
forall a. Show a => a -> String
show Word8
tag)

{- | Zero-copy Int column read: reuses the ByteString buffer's ForeignPtr.
Safe as long as 'bs' stays live during the caller's use of the resulting vector.
Only correct on little-endian platforms (aarch64/x86_64).
-}
readIntColumn :: BS.ByteString -> Int -> Int -> ParseResult (VU.Vector Int)
readIntColumn :: StrictByteString -> Int -> Int -> ParseResult (Vector Int)
readIntColumn StrictByteString
bs Int
off Int
nrows
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult (Vector Int)
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        let (ForeignPtr Word8
fp, Int
bsOff, Int
_) = StrictByteString -> (ForeignPtr Word8, Int, Int)
BSI.toForeignPtr StrictByteString
bs
            fp' :: ForeignPtr Int
fp' = ForeignPtr Any -> ForeignPtr Int
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr (ForeignPtr Word8 -> Int -> ForeignPtr Any
forall a b. ForeignPtr a -> Int -> ForeignPtr b
plusForeignPtr ForeignPtr Word8
fp (Int
bsOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off)) :: ForeignPtr Int
            sv :: Vector Int
sv = ForeignPtr Int -> Int -> Vector Int
forall a. ForeignPtr a -> Int -> Vector a
VS.unsafeFromForeignPtr0 ForeignPtr Int
fp' Int
nrows :: VS.Vector Int
         in (Int, Vector Int) -> ParseResult (Vector Int)
forall a b. b -> Either a b
Right (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8, Vector Int -> Vector Int
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Int
sv)

{- | Zero-copy Double column read: reuses the ByteString buffer's ForeignPtr.
Safe as long as 'bs' stays live during the caller's use of the resulting vector.
Only correct on little-endian platforms (aarch64/x86_64).
-}
readDoubleColumn ::
    BS.ByteString -> Int -> Int -> ParseResult (VU.Vector Double)
readDoubleColumn :: StrictByteString -> Int -> Int -> ParseResult (Vector Double)
readDoubleColumn StrictByteString
bs Int
off Int
nrows
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult (Vector Double)
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        let (ForeignPtr Word8
fp, Int
bsOff, Int
_) = StrictByteString -> (ForeignPtr Word8, Int, Int)
BSI.toForeignPtr StrictByteString
bs
            fp' :: ForeignPtr Double
fp' = ForeignPtr Any -> ForeignPtr Double
forall a b. ForeignPtr a -> ForeignPtr b
castForeignPtr (ForeignPtr Word8 -> Int -> ForeignPtr Any
forall a b. ForeignPtr a -> Int -> ForeignPtr b
plusForeignPtr ForeignPtr Word8
fp (Int
bsOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off)) :: ForeignPtr Double
            sv :: Vector Double
sv = ForeignPtr Double -> Int -> Vector Double
forall a. ForeignPtr a -> Int -> Vector a
VS.unsafeFromForeignPtr0 ForeignPtr Double
fp' Int
nrows :: VS.Vector Double
         in (Int, Vector Double) -> ParseResult (Vector Double)
forall a b. b -> Either a b
Right (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8, Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert Vector Double
sv)

readTextColumn :: BS.ByteString -> Int -> Int -> ParseResult (V.Vector T.Text)
readTextColumn :: StrictByteString -> Int -> Int -> ParseResult (Vector Text)
readTextColumn StrictByteString
bs Int
off Int
nrows = do
    [Word32]
offsets <- StrictByteString -> Int -> Int -> Either String [Word32]
readWord32Array StrictByteString
bs Int
off (Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    let payloadStart :: Int
payloadStart = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
4
        totalPayload :: Int
totalPayload = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Word32] -> Word32
forall a. HasCallStack => [a] -> a
last [Word32]
offsets) :: Int
    Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
payloadStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
totalPayload Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs) (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$
        String -> Either String ()
forall a b. a -> Either a b
Left String
"unexpected end of input"
    let sizes :: [Int]
sizes =
            (Word32 -> Word32 -> Int) -> [Word32] -> [Word32] -> [Int]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
                (\Word32
a Word32
b -> Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
b Int -> Int -> Int
forall a. Num a => a -> a -> a
- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
a :: Int)
                [Word32]
offsets
                (Int -> [Word32] -> [Word32]
forall a. Int -> [a] -> [a]
drop Int
1 [Word32]
offsets)
        texts :: [Text]
texts =
            (Word32 -> Int -> Text) -> [Word32] -> [Int] -> [Text]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith
                ( \Word32
o Int
sz ->
                    StrictByteString -> Text
TE.decodeUtf8
                        (Int -> StrictByteString -> StrictByteString
BS.take Int
sz (Int -> StrictByteString -> StrictByteString
BS.drop (Int
payloadStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
o) StrictByteString
bs))
                )
                [Word32]
offsets
                [Int]
sizes
    (Int, Vector Text) -> ParseResult (Vector Text)
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
payloadStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
totalPayload, [Text] -> Vector Text
forall a. [a] -> Vector a
V.fromList [Text]
texts)

-- | Read @nrows@ null-bitmap bits (ceil(nrows\/8) bytes).
readNullBitmap :: BS.ByteString -> Int -> Int -> ParseResult [Bool]
readNullBitmap :: StrictByteString -> Int -> Int -> ParseResult [Bool]
readNullBitmap StrictByteString
bs Int
off Int
nrows
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
numBytes Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult [Bool]
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        (Int, [Bool]) -> ParseResult [Bool]
forall a b. b -> Either a b
Right
            ( Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
numBytes
            , Int -> [Bool] -> [Bool]
forall a. Int -> [a] -> [a]
take
                Int
nrows
                [ Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
row Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8)) (Int
row Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
8)
                | Int
row <- [Int
0 ..]
                ]
            )
  where
    numBytes :: Int
numBytes = (Int
nrows Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8

readWord8 :: BS.ByteString -> Int -> ParseResult Word8
readWord8 :: StrictByteString -> Int -> ParseResult Word8
readWord8 StrictByteString
bs Int
off
    | Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult Word8
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise = (Int, Word8) -> ParseResult Word8
forall a b. b -> Either a b
Right (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs Int
off)

readWord16LE :: BS.ByteString -> Int -> ParseResult Word16
readWord16LE :: StrictByteString -> Int -> ParseResult Word16
readWord16LE StrictByteString
bs Int
off
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult Word16
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        let b0 :: Word16
b0 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs Int
off) :: Word16
            b1 :: Word16
b1 = Word8 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) :: Word16
         in (Int, Word16) -> ParseResult Word16
forall a b. b -> Either a b
Right (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2, Word16
b0 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. (Word16
b1 Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftL` Int
8))

readWord32LE :: BS.ByteString -> Int -> ParseResult Word32
readWord32LE :: StrictByteString -> Int -> ParseResult Word32
readWord32LE StrictByteString
bs Int
off
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult Word32
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        let b0 :: Word32
b0 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs Int
off) :: Word32
            b1 :: Word32
b1 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) :: Word32
            b2 :: Word32
b2 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)) :: Word32
            b3 :: Word32
b3 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)) :: Word32
         in (Int, Word32) -> ParseResult Word32
forall a b. b -> Either a b
Right
                (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4, Word32
b0 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b1 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b2 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
16) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b3 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
24))

readWord64LE :: BS.ByteString -> Int -> ParseResult Word64
readWord64LE :: StrictByteString -> Int -> ParseResult Word64
readWord64LE StrictByteString
bs Int
off
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult Word64
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        let b0 :: Word64
b0 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs Int
off) :: Word64
            b1 :: Word64
b1 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) :: Word64
            b2 :: Word64
b2 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)) :: Word64
            b3 :: Word64
b3 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)) :: Word64
            b4 :: Word64
b4 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
4)) :: Word64
            b5 :: Word64
b5 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
5)) :: Word64
            b6 :: Word64
b6 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
6)) :: Word64
            b7 :: Word64
b7 = Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7)) :: Word64
         in (Int, Word64) -> ParseResult Word64
forall a b. b -> Either a b
Right
                ( Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8
                , Word64
b0
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b1 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
8)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b2 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
16)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b3 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
24)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b4 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
32)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b5 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
40)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b6 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
48)
                    Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|. (Word64
b7 Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
`shiftL` Int
56)
                )

-- | Read @n@ consecutive Word32LE values starting at offset @off@.
readWord32Array :: BS.ByteString -> Int -> Int -> Either String [Word32]
readWord32Array :: StrictByteString -> Int -> Int -> Either String [Word32]
readWord32Array StrictByteString
bs Int
off Int
n
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
4 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> Either String [Word32]
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise =
        [Word32] -> Either String [Word32]
forall a b. b -> Either a b
Right
            [ let i :: Int
i = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
4
                  b0 :: Word32
b0 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs Int
i) :: Word32
                  b1 :: Word32
b1 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)) :: Word32
                  b2 :: Word32
b2 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)) :: Word32
                  b3 :: Word32
b3 = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (StrictByteString -> Int -> Word8
BSU.unsafeIndex StrictByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
3)) :: Word32
               in Word32
b0 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b1 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b2 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
16) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.|. (Word32
b3 Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` Int
24)
            | Int
k <- [Int
0 .. Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
            ]

-- | Read @n@ bytes from @bs@ at @off@.
readBytes :: BS.ByteString -> Int -> Int -> ParseResult BS.ByteString
readBytes :: StrictByteString -> Int -> Int -> ParseResult StrictByteString
readBytes StrictByteString
bs Int
off Int
n
    | Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> StrictByteString -> Int
BS.length StrictByteString
bs = String -> ParseResult StrictByteString
forall a b. a -> Either a b
Left String
"unexpected end of input"
    | Bool
otherwise = (Int, StrictByteString) -> ParseResult StrictByteString
forall a b. b -> Either a b
Right (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n, Int -> StrictByteString -> StrictByteString
BS.take Int
n (Int -> StrictByteString -> StrictByteString
BS.drop Int
off StrictByteString
bs))

-- | Apply @f@ @n@ times sequentially, threading the offset.
readN :: Int -> (Int -> ParseResult a) -> Int -> ParseResult [a]
readN :: forall a. Int -> (Int -> ParseResult a) -> Int -> ParseResult [a]
readN Int
0 Int -> ParseResult a
_ Int
off = (Int, [a]) -> Either String (Int, [a])
forall a b. b -> Either a b
Right (Int
off, [])
readN Int
n Int -> ParseResult a
f Int
off = do
    (Int
off', a
x) <- Int -> ParseResult a
f Int
off
    (Int
off'', [a]
xs) <- Int -> (Int -> ParseResult a) -> Int -> Either String (Int, [a])
forall a. Int -> (Int -> ParseResult a) -> Int -> ParseResult [a]
readN (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> ParseResult a
f Int
off'
    (Int, [a]) -> Either String (Int, [a])
forall a. a -> Either String a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
off'', a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: [a]
xs)

-- ---------------------------------------------------------------------------
-- Bracket helper
-- ---------------------------------------------------------------------------

{- | Spill a DataFrame to a temporary file, run an action with the path,
then delete the file even if the action throws.
-}
withSpilled :: DataFrame -> (FilePath -> IO a) -> IO a
withSpilled :: forall a. DataFrame -> (String -> IO a) -> IO a
withSpilled DataFrame
df String -> IO a
action = do
    String
tmpDir <- IO String
getTemporaryDirectory
    IO String -> (String -> IO ()) -> (String -> IO a) -> IO a
forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket
        ( do
            (String
path, Handle
h) <- String -> String -> IO (String, Handle)
openTempFile String
tmpDir String
"dataframe_spill.dfbn"
            Handle -> IO ()
hClose Handle
h
            String -> DataFrame -> IO ()
spillToDisk String
path DataFrame
df
            String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
path
        )
        (\String
path -> IO (Either SomeException ()) -> IO ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (IO () -> IO (Either SomeException ())
forall e a. Exception e => IO a -> IO (Either e a)
try (String -> IO ()
removeFile String
path) :: IO (Either SomeException ())))
        String -> IO a
action