| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
DataFrame.Lazy.IO.CSV
Synopsis
- data ReadOptions = ReadOptions {}
- defaultOptions :: ReadOptions
- readCsv :: FilePath -> IO DataFrame
- readTsv :: FilePath -> IO DataFrame
- readSeparated :: Char -> ReadOptions -> FilePath -> IO (DataFrame, (Integer, Text, Int))
- getInitialDataVectors :: Int -> IOVector MutableColumn -> [Text] -> IO ()
- fillColumns :: Int -> Char -> IOVector MutableColumn -> IOVector [(Int, Text)] -> Text -> Handle -> IO (Text, Int)
- writeValue :: IOVector MutableColumn -> IOVector [(Int, Text)] -> Int -> Int -> Text -> IO ()
- freezeColumn :: IOVector MutableColumn -> Vector [(Int, Text)] -> ReadOptions -> Int -> IO Column
- openCsvStream :: Char -> Schema -> FilePath -> IO (Handle, [(Int, Text, SchemaType)])
- readBatch :: Char -> [(Int, Text, SchemaType)] -> Int -> ByteString -> Handle -> IO (Maybe (DataFrame, ByteString))
- writeColumnBs :: Int -> ByteString -> MutableColumn -> IO (Either Text Bool)
- getNthFieldBs :: Word8 -> Int -> ByteString -> ByteString
- makeCol :: Int -> SchemaType -> IO MutableColumn
- sliceCol :: Int -> MutableColumn -> MutableColumn
- findUnquotedNewline :: ByteString -> Maybe Int
Documentation
data ReadOptions Source #
Record for CSV read options.
defaultOptions :: ReadOptions Source #
By default we assume the file has a header, we infer the types on read and we convert any rows with nullish objects into Maybe (safeRead).
readCsv :: FilePath -> IO DataFrame Source #
Reads a CSV file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.
readTsv :: FilePath -> IO DataFrame Source #
Reads a tab separated file from the given path. Note this file stores intermediate temporary files while converting the CSV from a row to a columnar format.
readSeparated :: Char -> ReadOptions -> FilePath -> IO (DataFrame, (Integer, Text, Int)) Source #
Reads a character separated file into a dataframe using mutable vectors.
getInitialDataVectors :: Int -> IOVector MutableColumn -> [Text] -> IO () Source #
fillColumns :: Int -> Char -> IOVector MutableColumn -> IOVector [(Int, Text)] -> Text -> Handle -> IO (Text, Int) Source #
Reads rows from the handle and stores values in mutable vectors.
writeValue :: IOVector MutableColumn -> IOVector [(Int, Text)] -> Int -> Int -> Text -> IO () Source #
Writes a value into the appropriate column, resizing the vector if necessary.
freezeColumn :: IOVector MutableColumn -> Vector [(Int, Text)] -> ReadOptions -> Int -> IO Column Source #
Freezes a mutable vector into an immutable one, trimming it to the actual row count.
openCsvStream :: Char -> Schema -> FilePath -> IO (Handle, [(Int, Text, SchemaType)]) Source #
Open a CSV/separated file for streaming, returning an open handle (positioned just after the header line) and the column specification for the schema columns that appear in the file header.
The caller is responsible for closing the handle when done.
readBatch :: Char -> [(Int, Text, SchemaType)] -> Int -> ByteString -> Handle -> IO (Maybe (DataFrame, ByteString)) Source #
writeColumnBs :: Int -> ByteString -> MutableColumn -> IO (Either Text Bool) Source #
Write a ByteString field value directly into a mutable column,
parsing numerics without an intermediate Text allocation.
getNthFieldBs :: Word8 -> Int -> ByteString -> ByteString Source #
Extracts the Nth field (0-indexed), respecting double quotes and stripping them.
Fast path: uses memchr-based break when no quotes are present in the line.
Slow path: quote-aware character-by-character scan.
makeCol :: Int -> SchemaType -> IO MutableColumn Source #
Allocate a fresh MutableColumn for n slots based on a SchemaType.
sliceCol :: Int -> MutableColumn -> MutableColumn Source #
Slice a MutableColumn to n elements (no-copy view).
findUnquotedNewline :: ByteString -> Maybe Int Source #
Finds the index of the next unquoted newline (0x0A). Fast path: uses memchr (SIMD) and falls back to a quote-aware linear scan only if a double-quote appears before the candidate newline.