| Safe Haskell | None |
|---|---|
| Language | Haskell2010 |
Data.Csv
Contents
Description
This module implements encoding and decoding of CSV data. The implementation is RFC 4180 compliant, with the following extensions:
- Empty lines are ignored.
- Non-escaped fields may contain any characters except double-quotes, commas, carriage returns, and newlines.
- Escaped fields may contain any characters (but double-quotes need to be escaped).
- data HasHeader
- decode :: FromRecord a => HasHeader -> ByteString -> Either String (Vector a)
- decodeByName :: FromNamedRecord a => ByteString -> Either String (Header, Vector a)
- encode :: ToRecord a => [a] -> ByteString
- encodeByName :: ToNamedRecord a => Header -> [a] -> ByteString
- encodeDefaultOrderedByName :: (DefaultOrdered a, ToNamedRecord a) => [a] -> ByteString
- class DefaultOrdered a where
- data DecodeOptions = DecodeOptions {
- decDelimiter :: !Word8
- defaultDecodeOptions :: DecodeOptions
- decodeWith :: FromRecord a => DecodeOptions -> HasHeader -> ByteString -> Either String (Vector a)
- decodeByNameWith :: FromNamedRecord a => DecodeOptions -> ByteString -> Either String (Header, Vector a)
- data EncodeOptions = EncodeOptions {
- encDelimiter :: !Word8
- encUseCrLf :: !Bool
- encIncludeHeader :: !Bool
- encQuoting :: !Quoting
- data Quoting
- defaultEncodeOptions :: EncodeOptions
- encodeWith :: ToRecord a => EncodeOptions -> [a] -> ByteString
- encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a] -> ByteString
- encodeDefaultOrderedByNameWith :: forall a. (DefaultOrdered a, ToNamedRecord a) => EncodeOptions -> [a] -> ByteString
- type Csv = Vector Record
- type Record = Vector Field
- type Field = ByteString
- type Header = Vector Name
- type Name = ByteString
- type NamedRecord = HashMap ByteString ByteString
- class FromRecord a where
- data Parser a
- runParser :: Parser a -> Either String a
- index :: FromField a => Record -> Int -> Parser a
- (.!) :: FromField a => Record -> Int -> Parser a
- unsafeIndex :: FromField a => Record -> Int -> Parser a
- class ToRecord a where
- record :: [ByteString] -> Record
- newtype Only a :: * -> * = Only {
- fromOnly :: a
- class FromNamedRecord a where
- lookup :: FromField a => NamedRecord -> ByteString -> Parser a
- (.:) :: FromField a => NamedRecord -> ByteString -> Parser a
- class ToNamedRecord a where
- namedRecord :: [(ByteString, ByteString)] -> NamedRecord
- namedField :: ToField a => ByteString -> a -> (ByteString, ByteString)
- (.=) :: ToField a => ByteString -> a -> (ByteString, ByteString)
- header :: [ByteString] -> Header
- class FromField a where
- class ToField a where
Usage examples
Encoding standard Haskell types:
>>> encode [("John" :: Text, 27), ("Jane", 28)]
"John,27\r\nJane,28\r\n"Since string literals are overloaded we have to supply a type
signature as the compiler couldn't deduce which string type (i.e.
String or Text) we want to use. In most cases type inference
will infer the type from the context and you can omit type
signatures.
Decoding standard Haskell types:
>>> decode NoHeader "John,27\r\nJane,28\r\n" :: Either String (Vector (Text, Int))
Right [("John",27),("Jane",28)]We pass NoHeader as the first argument to indicate that the CSV
input data isn't preceded by a header.
In practice, the return type of decode rarely needs to be given,
as it can often be inferred from the context.
Encoding and decoding custom data types
To encode and decode your own data types you need to defined
instances of either ToRecord and FromRecord or ToNamedRecord
and FromNamedRecord. The former is used for encoding/decoding
using the column index and the latter using the column name.
There are two ways to to define these instances, either by manually defining them or by using GHC generics to derive them automatically.
Index-based record conversion
Derived:
{-# LANGUAGE DeriveGeneric #-}
data Person = Person { name :: !Text , salary :: !Int }
deriving Generic
instance FromRecord Person
instance ToRecord PersonManually defined:
data Person = Person { name :: !Text , salary :: !Int }
instance FromRecord Person where
parseRecord v
| length v == 2 = Person <$> v .! 0 <*> v .! 1
| otherwise = mzero
instance ToRecord Person where
toRecord (Person name age) = record [
toField name, toField age]We can now use e.g. encode and decode to encode and decode our
data type.
Encoding:
>>> encode [Person ("John" :: Text) 27]
"John,27\r\n"Decoding:
>>> decode NoHeader "John,27\r\n" :: Either String (Vector Person)
Right [Person {name = "John", salary = 27}]Name-based record conversion
Derived:
{-# LANGUAGE DeriveGeneric #-}
data Person = Person { name :: !Text , salary :: !Int }
deriving Generic
instance FromNamedRecord Person
instance ToNamedRecord Person
instance DefaultOrdered PersonManually defined:
data Person = Person { name :: !Text , salary :: !Int }
instance FromNamedRecord Person where
parseNamedRecord m = Person <$> m .: "name" <*> m .: "salary"
instance ToNamedRecord Person where
toNamedRecord (Person name salary) = namedRecord [
"name" .= name, "salary" .= salary]
instance DefaultOrdered Person where
headerOrder _ = header ["name", "salary"]We can now use e.g. encodeDefaultOrderedByName (or encodeByName
with an explicit header order) and decodeByName to encode and
decode our data type.
Encoding:
>>> encodeDefaultOrderedByName [Person ("John" :: Text) 27]
"name,salary\r\nJohn,27\r\n"Decoding:
>>> decodeByName "name,salary\r\nJohn,27\r\n" :: Either String (Header, Vector Person)
Right (["name","salary"],[Person {name = "John", salary = 27}])Treating CSV data as opaque byte strings
Sometimes you might want to work with a CSV file which contents is
unknown to you. For example, you might want remove the second
column of a file without knowing anything about its content. To
parse a CSV file to a generic representation, just convert each
record to a value, like so:Vector ByteString
>>> decode NoHeader "John,27\r\nJane,28\r\n" :: Either String (Vector (Vector ByteString)) Right [["John","27"],["Jane","28"]]
As the example output above shows, all the fields are returned as
uninterpreted ByteString values.
Custom type conversions for fields
Most of the time the existing FromField and ToField instances
do what you want. However, if you need to parse a different format
(e.g. hex) but use a type (e.g. Int) for which there's already a
FromField instance, you need to use a newtype. Example:
newtype Hex = Hex Int
parseHex :: ByteString -> Parser Int
parseHex = ...
instance FromField Hex where
parseField s = Hex <$> parseHex sOther than giving an explicit type signature, you can pattern match
on the newtype constructor to indicate which type conversion you
want to have the library use:
case decode NoHeader "0xff,0xaa\r\n0x11,0x22\r\n" of
Left err -> putStrLn err
Right v -> forM_ v $ \ (Hex val1, Hex val2) ->
print (val1, val2)If a field might be in one several different formats, you can use a newtype to normalize the result:
newtype HexOrDecimal = HexOrDecimal Int
instance FromField DefaultToZero where
parseField s = case runParser (parseField s :: Parser Hex) of
Left err -> HexOrDecimal <$> parseField s -- Uses Int instance
Right n -> pure $ HexOrDecimal nYou can use the unit type, (), to ignore a column. The
parseField method for () doesn't look at the Field and thus
always decodes successfully. Note that it lacks a corresponding
ToField instance. Example:
case decode NoHeader "foo,1\r\nbar,22" of
Left err -> putStrLn err
Right v -> forM_ v $ \ ((), i) -> print (i :: Int)Dealing with bad data
If your input might contain invalid fields, you can write a custom
FromField instance to deal with them. Example:
newtype DefaultToZero = DefaultToZero Int
instance FromField DefaultToZero where
parseField s = case runParser (parseField s) of
Left err -> pure $ DefaultToZero 0
Right n -> pure $ DefaultToZero nEncoding and decoding
Encoding and decoding is a two step process. To encode a value, it
is first converted to a generic representation, using either
ToRecord or ToNamedRecord. The generic representation is then
encoded as CSV data. To decode a value the process is reversed and
either FromRecord or FromNamedRecord is used instead. Both
these steps are combined in the encode and decode functions.
Is the CSV data preceded by a header?
Arguments
| :: FromRecord a | |
| => HasHeader | Data contains header that should be skipped |
| -> ByteString | CSV data |
| -> Either String (Vector a) |
Efficiently deserialize CSV records from a lazy ByteString.
If this fails due to incomplete or invalid input, is
returned. Equivalent to Left msg.decodeWith defaultDecodeOptions
Arguments
| :: FromNamedRecord a | |
| => ByteString | CSV data |
| -> Either String (Header, Vector a) |
Efficiently deserialize CSV records from a lazy ByteString.
If this fails due to incomplete or invalid input, is
returned. The data is assumed to be preceeded by a header.
Equivalent to Left msg.decodeByNameWith defaultDecodeOptions
encode :: ToRecord a => [a] -> ByteString Source #
Efficiently serialize CSV records as a lazy ByteString.
encodeByName :: ToNamedRecord a => Header -> [a] -> ByteString Source #
Efficiently serialize CSV records as a lazy ByteString. The
header is written before any records and dictates the field order.
encodeDefaultOrderedByName :: (DefaultOrdered a, ToNamedRecord a) => [a] -> ByteString Source #
Like encodeByName, but header and field order is dictated by
the header method.
class DefaultOrdered a where Source #
A type that has a default field order when converted to CSV. This
class lets you specify how to get the headers to use for a record
type that's an instance of ToNamedRecord.
To derive an instance, the type is required to only have one constructor and that constructor must have named fields (also known as selectors) for all fields.
Right: data Foo = Foo { foo :: !Int }
Wrong: data Bar = Bar Int
If you try to derive an instance using GHC generics and your type doesn't have named fields, you will get an error along the lines of:
<interactive>:9:10:
No instance for (DefaultOrdered (M1 S NoSelector (K1 R Char) ()))
arising from a use of ‘Data.Csv.Conversion.$gdmheader’
In the expression: Data.Csv.Conversion.$gdmheader
In an equation for ‘header’:
header = Data.Csv.Conversion.$gdmheader
In the instance declaration for ‘DefaultOrdered Foo’Methods
headerOrder :: a -> Header Source #
The header order for this record. Should include the names
used in the NamedRecord returned by toNamedRecord. Pass
undefined as the argument, together with a type annotation
e.g. .headerOrder (undefined :: MyRecord)
headerOrder :: (Generic a, GToNamedRecordHeader (Rep a)) => a -> Header Source #
The header order for this record. Should include the names
used in the NamedRecord returned by toNamedRecord. Pass
undefined as the argument, together with a type annotation
e.g. .headerOrder (undefined :: MyRecord)
Encoding and decoding options
These functions can be used to control how data is encoded and decoded. For example, they can be used to encode data in a tab-separated format instead of in a comma-separated format.
data DecodeOptions Source #
Options that controls how data is decoded. These options can be used to e.g. decode tab-separated data instead of comma-separated data.
To avoid having your program stop compiling when new fields are
added to DecodeOptions, create option records by overriding
values in defaultDecodeOptions. Example:
myOptions = defaultDecodeOptions {
decDelimiter = fromIntegral (ord '\t')
}Constructors
| DecodeOptions | |
Fields
| |
Instances
defaultDecodeOptions :: DecodeOptions Source #
Decoding options for parsing CSV files.
Arguments
| :: FromRecord a | |
| => DecodeOptions | Decoding options |
| -> HasHeader | Data contains header that should be skipped |
| -> ByteString | CSV data |
| -> Either String (Vector a) |
Like decode, but lets you customize how the CSV data is parsed.
Arguments
| :: FromNamedRecord a | |
| => DecodeOptions | Decoding options |
| -> ByteString | CSV data |
| -> Either String (Header, Vector a) |
Like decodeByName, but lets you customize how the CSV data is
parsed.
data EncodeOptions Source #
Options that controls how data is encoded. These options can be used to e.g. encode data in a tab-separated format instead of in a comma-separated format.
To avoid having your program stop compiling when new fields are
added to EncodeOptions, create option records by overriding
values in defaultEncodeOptions. Example:
myOptions = defaultEncodeOptions {
encDelimiter = fromIntegral (ord '\t')
}N.B. The encDelimiter must not be the quote character (i.e.
") or one of the record separator characters (i.e. \n or
\r).
Constructors
| EncodeOptions | |
Fields
| |
Instances
Should quoting be applied to fields, and at which level?
Constructors
| QuoteNone | No quotes. |
| QuoteMinimal | Quotes according to RFC 4180. |
| QuoteAll | Always quote. |
defaultEncodeOptions :: EncodeOptions Source #
Encoding options for CSV files.
encodeWith :: ToRecord a => EncodeOptions -> [a] -> ByteString Source #
Like encode, but lets you customize how the CSV data is
encoded.
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> [a] -> ByteString Source #
Like encodeByName, but lets you customize how the CSV data is
encoded.
encodeDefaultOrderedByNameWith :: forall a. (DefaultOrdered a, ToNamedRecord a) => EncodeOptions -> [a] -> ByteString Source #
Like encodeDefaultOrderedByNameWith, but lets you customize how
the CSV data is encoded.
Core CSV types
type Field = ByteString Source #
A single field within a record.
type Header = Vector Name Source #
The header corresponds to the first line a CSV file. Not all CSV files have a header.
type Name = ByteString Source #
A header has one or more names, describing the data in the column following the name.
type NamedRecord = HashMap ByteString ByteString Source #
A record corresponds to a single line in a CSV file, indexed by the column name rather than the column index.
Type conversion
There are two ways to convert CSV records to and from and user-defined data types: index-based conversion and name-based conversion.
Index-based record conversion
Index-based conversion lets you convert CSV records to and from user-defined data types by referring to a field's position (its index) in the record. The first column in a CSV file is given index 0, the second index 1, and so on.
class FromRecord a where Source #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty, mzero, or fail to make a
conversion fail, e.g. if a Record has the wrong number of
columns.
Given this example data:
John,56 Jane,55
here's an example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance FromRecord Person where
parseRecord v
| length v == 2 = Person <$>
v .! 0 <*>
v .! 1
| otherwise = mzeroMethods
parseRecord :: Record -> Parser a Source #
parseRecord :: (Generic a, GFromRecord (Rep a)) => Record -> Parser a Source #
Instances
Conversion of a field to a value might fail e.g. if the field is
malformed. This possibility is captured by the Parser type, which
lets you compose several field conversions together in such a way
that if any of them fail, the whole record conversion fails.
index :: FromField a => Record -> Int -> Parser a Source #
Retrieve the nth field in the given record. The result is
empty if the value cannot be converted to the desired type.
Raises an exception if the index is out of bounds.
index is a simple convenience function that is equivalent to
. If you're certain that the index is not
out of bounds, using parseField (v ! idx)unsafeIndex is somewhat faster.
unsafeIndex :: FromField a => Record -> Int -> Parser a Source #
Like index but without bounds checking.
class ToRecord a where Source #
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance ToRecord Person where
toRecord (Person name age) = record [
toField name, toField age]Outputs data on this form:
John,56 Jane,55
Methods
toRecord :: a -> Record Source #
Convert a value to a record.
toRecord :: (Generic a, GToRecord (Rep a) Field) => a -> Record Source #
Convert a value to a record.
Instances
record :: [ByteString] -> Record Source #
Construct a record from a list of ByteStrings. Use toField
to convert values to ByteStrings for use with record.
The 1-tuple type or single-value "collection".
This type is structurally equivalent to the
Identity type, but its intent is more
about serving as the anonymous 1-tuple type missing from Haskell for attaching
typeclass instances.
Parameter usage example:
encodeSomething (Only (42::Int))Result usage example:
xs <- decodeSomething
forM_ xs $ \(Only id) -> {- ... -}Name-based record conversion
Name-based conversion lets you convert CSV records to and from user-defined data types by referring to a field's name. The names of the fields are defined by the first line in the file, also known as the header. Name-based conversion is more robust to changes in the file structure e.g. to reording or addition of columns, but can be a bit slower.
class FromNamedRecord a where Source #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty, mzero, or fail to make a
conversion fail, e.g. if a Record has the wrong number of
columns.
Given this example data:
name,age John,56 Jane,55
here's an example type and instance:
{-# LANGUAGE OverloadedStrings #-}
data Person = Person { name :: !Text, age :: !Int }
instance FromNamedRecord Person where
parseNamedRecord m = Person <$>
m .: "name" <*>
m .: "age"Note the use of the OverloadedStrings language extension which
enables ByteString values to be written as string literals.
Methods
parseNamedRecord :: NamedRecord -> Parser a Source #
parseNamedRecord :: (Generic a, GFromNamedRecord (Rep a)) => NamedRecord -> Parser a Source #
lookup :: FromField a => NamedRecord -> ByteString -> Parser a Source #
Retrieve a field in the given record by name. The result is
empty if the field is missing or if the value cannot be converted
to the desired type.
(.:) :: FromField a => NamedRecord -> ByteString -> Parser a Source #
Alias for lookup.
class ToNamedRecord a where Source #
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance ToNamedRecord Person where
toNamedRecord (Person name age) = namedRecord [
"name" .= name, "age" .= age]Methods
toNamedRecord :: a -> NamedRecord Source #
Convert a value to a named record.
toNamedRecord :: (Generic a, GToRecord (Rep a) (ByteString, ByteString)) => a -> NamedRecord Source #
Convert a value to a named record.
namedRecord :: [(ByteString, ByteString)] -> NamedRecord Source #
Construct a named record from a list of name-value ByteString
pairs. Use .= to construct such a pair from a name and a value.
namedField :: ToField a => ByteString -> a -> (ByteString, ByteString) Source #
Construct a pair from a name and a value. For use with
namedRecord.
(.=) :: ToField a => ByteString -> a -> (ByteString, ByteString) Source #
Alias for namedField.
header :: [ByteString] -> Header Source #
Construct a header from a list of ByteStrings.
Field conversion
The FromField and ToField classes define how to convert between
Fields and values you care about (e.g. Ints). Most of the time
you don't need to write your own instances as the standard ones
cover most use cases.
class FromField a where Source #
A type that can be converted from a single CSV field, with the possibility of failure.
When writing an instance, use empty, mzero, or fail to make a
conversion fail, e.g. if a Field can't be converted to the given
type.
Example type and instance:
{-# LANGUAGE OverloadedStrings #-}
data Color = Red | Green | Blue
instance FromField Color where
parseField s
| s == "R" = pure Red
| s == "G" = pure Green
| s == "B" = pure Blue
| otherwise = mzeroMinimal complete definition
Methods
parseField :: Field -> Parser a Source #
Instances
| FromField Char Source # | Assumes UTF-8 encoding. |
| FromField Double Source # | Accepts same syntax as |
| FromField Float Source # | Accepts same syntax as |
| FromField Int Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Int8 Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Int16 Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Int32 Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Int64 Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Integer Source # | Accepts a signed decimal number. Ignores whitespace. |
| FromField Word Source # | Accepts an unsigned decimal number. Ignores whitespace. |
| FromField Word8 Source # | Accepts an unsigned decimal number. Ignores whitespace. |
| FromField Word16 Source # | Accepts an unsigned decimal number. Ignores whitespace. |
| FromField Word32 Source # | Accepts an unsigned decimal number. Ignores whitespace. |
| FromField Word64 Source # | Accepts an unsigned decimal number. Ignores whitespace. |
| FromField () Source # | Ignores the |
| FromField ByteString Source # | |
| FromField ByteString Source # | |
| FromField Text Source # | Assumes UTF-8 encoding. Fails on invalid byte sequences. |
| FromField Text Source # | Assumes UTF-8 encoding. Fails on invalid byte sequences. |
| FromField ShortByteString Source # | |
| FromField ShortText Source # | |
| FromField [Char] Source # | Assumes UTF-8 encoding. Fails on invalid byte sequences. |
| FromField a => FromField (Maybe a) Source # | |
| FromField a => FromField (Either Field a) Source # | |
class ToField a where Source #
A type that can be converted to a single CSV field.
Example type and instance:
{-# LANGUAGE OverloadedStrings #-}
data Color = Red | Green | Blue
instance ToField Color where
toField Red = "R"
toField Green = "G"
toField Blue = "B"Minimal complete definition
Instances
| ToField Char Source # | Uses UTF-8 encoding. |
| ToField Double Source # | Uses decimal notation or scientific notation, depending on the number. |
| ToField Float Source # | Uses decimal notation or scientific notation, depending on the number. |
| ToField Int Source # | Uses decimal encoding with optional sign. |
| ToField Int8 Source # | Uses decimal encoding with optional sign. |
| ToField Int16 Source # | Uses decimal encoding with optional sign. |
| ToField Int32 Source # | Uses decimal encoding with optional sign. |
| ToField Int64 Source # | Uses decimal encoding with optional sign. |
| ToField Integer Source # | Uses decimal encoding with optional sign. |
| ToField Word Source # | Uses decimal encoding. |
| ToField Word8 Source # | Uses decimal encoding. |
| ToField Word16 Source # | Uses decimal encoding. |
| ToField Word32 Source # | Uses decimal encoding. |
| ToField Word64 Source # | Uses decimal encoding. |
| ToField ByteString Source # | |
| ToField ByteString Source # | |
| ToField Text Source # | Uses UTF-8 encoding. |
| ToField Text Source # | Uses UTF-8 encoding. |
| ToField ShortByteString Source # | |
| ToField ShortText Source # | |
| ToField [Char] Source # | Uses UTF-8 encoding. |
| ToField a => ToField (Maybe a) Source # | |