-- | .2bit format (from the UCSC Genome Browser FAQ)
--
-- A .2bit file stores multiple DNA sequences (up to 4 Gb total) in a
-- compact randomly-accessible format.  The file contains masking
-- information as well as the DNA itself.
--
-- The file begins with a 16-byte header containing the following fields:
--
-- - signature - the number 0x1A412743 in the architecture of the machine that created the file
-- - version - zero for now. Readers should abort if they see a version number higher than 0
-- - sequenceCount - the number of sequences in the file
-- - reserved - always zero for now
--
-- All fields are 32 bits unless noted. If the signature value is not as
-- given, the reader program should byte-swap the signature and check if
-- the swapped version matches. If so, all multiple-byte entities in the
-- file will have to be byte-swapped. This enables these binary files to
-- be used unchanged on different architectures.
--
-- The header is followed by a file index, which contains one entry for
-- each sequence. Each index entry contains three fields:
--
-- - nameSize - a byte containing the length of the name field
-- - name - the sequence name itself (in ASCII-compatible byte string), of variable length depending on nameSize
-- - offset - the 32-bit offset of the sequence data relative to the start of the file, not aligned to any 4-byte padding boundary
--
-- The index is followed by the sequence records, which contain nine fields:
--
-- - dnaSize - number of bases of DNA in the sequence
-- - nBlockCount - the number of blocks of Ns in the file (representing unknown sequence)
-- - nBlockStarts - an array of length nBlockCount of 32 bit integers indicating the (0-based) starting position of a block of Ns
-- - nBlockSizes - an array of length nBlockCount of 32 bit integers indicating the length of a block of Ns
-- - maskBlockCount - the number of masked (lower-case) blocks
-- - maskBlockStarts - an array of length maskBlockCount of 32 bit integers indicating the (0-based) starting position of a masked block
-- - maskBlockSizes - an array of length maskBlockCount of 32 bit integers indicating the length of a masked block
-- - reserved - always zero for now
-- - packedDna - the DNA packed to two bits per base, represented as so:
--     T - 00, C - 01, A - 10, G - 11. The first base is in the most
--     significant 2-bit byte; the last base is in the least significant
--     2 bits. For example, the sequence TCAG is represented as 00011011.
--
-- In this format, it is neither possible nor necessary to store Ns in
-- the main sequence, and one wouldn't expect them to take up space
-- there.  However, they do; hard masked sequence is typically stored as
-- many Ts.  The sensible way to treat these is probably to just say
-- there are two kinds of implied annotation (repeats and large gaps for
-- a typical genome), which can be interpreted in whatever way fits.

module Bio.TwoBit (
        TwoBitFile(..),
        openTwoBit,

        TwoBitChromosome(..),
        tbf_chrnames,
        findChrom,

        TwoBitSequence'(..),
        TwoBitSequence,
        Unidrectional,
        Bidirectional,
        unpackRSRaw,
        unpackRS,
        unpackRSMasked,

        Masking(..),
        isSoftMasked,
        isHardMasked,
        noneMasked,
        softMasked,
        hardMasked,
        bothMasked
    ) where

import           Control.Applicative
import           Control.Exception                    ( Exception(..), throw )
import           Control.Monad                        ( guard )
import           Control.Monad.Primitive              ( unsafeInlineIO )
import           Data.Bits
import           Data.Char                            ( toLower )
import qualified Data.ByteString.Char8          as B
import qualified Data.ByteString.Internal       as B ( fromForeignPtr )
import           Data.Foldable
import qualified Data.HashMap.Strict            as M
import           Data.List                           ( mapAccumL )
import           Data.Primitive.Array                ( Array, arrayFromList )
import           Data.Primitive.PrimArray            ( indexPrimArray )
import           Data.Word                           ( byteSwap32, Word8, Word32 )
import           Foreign.ForeignPtr                  ( ForeignPtr, withForeignPtr )
import           Foreign.Ptr                         ( castPtr, plusPtr, Ptr )
import           Foreign.Storable                    ( Storable(..) )
import           GHC.Base                            ( build )
import           System.IO.MMap                      ( mmapFileForeignPtr, Mode(..) )

data TwoBitFile = TBF { TwoBitFile -> ForeignPtr Word8
tbf_raw    :: {-# UNPACK #-} !(ForeignPtr Word8)
                      , TwoBitFile -> Int
tbf_size   :: {-# UNPACK #-} !Int
                      , TwoBitFile -> ByteString
tbf_path   :: {-# UNPACK #-} !B.ByteString
                      , TwoBitFile -> Array TwoBitChromosome
tbf_chroms :: {-# UNPACK #-} !(Array TwoBitChromosome)
                      , TwoBitFile -> HashMap ByteString TwoBitChromosome
tbf_chrmap ::                !(M.HashMap B.ByteString TwoBitChromosome) }

tbf_chrnames :: TwoBitFile -> [B.ByteString]
tbf_chrnames :: TwoBitFile -> [ByteString]
tbf_chrnames = Array ByteString -> [ByteString]
forall a. Array a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList (Array ByteString -> [ByteString])
-> (TwoBitFile -> Array ByteString) -> TwoBitFile -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TwoBitChromosome -> ByteString)
-> Array TwoBitChromosome -> Array ByteString
forall a b. (a -> b) -> Array a -> Array b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap TwoBitChromosome -> ByteString
tbc_name (Array TwoBitChromosome -> Array ByteString)
-> (TwoBitFile -> Array TwoBitChromosome)
-> TwoBitFile
-> Array ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TwoBitFile -> Array TwoBitChromosome
tbf_chroms

-- | Finds a named scaffold in the reference.  If it doesn't find the
-- exact name, it will try to compensate for the crazy naming
-- differences between NCBI and UCSC.  This doesn't work in general, but
-- is good enough in the common case.  In particular, "1" maps to "chr1"
-- and back, "GL000192.1" to "chr1_gl000192_random" and back, and "chrM"
-- to "MT" and back.
findChrom :: B.ByteString -> TwoBitFile -> Maybe TwoBitChromosome
findChrom :: ByteString -> TwoBitFile -> Maybe TwoBitChromosome
findChrom ByteString
c TBF{ tbf_chrmap :: TwoBitFile -> HashMap ByteString TwoBitChromosome
tbf_chrmap = HashMap ByteString TwoBitChromosome
cs } =
          ByteString
-> HashMap ByteString TwoBitChromosome -> Maybe TwoBitChromosome
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ByteString
c HashMap ByteString TwoBitChromosome
cs
    Maybe TwoBitChromosome
-> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>   ByteString
-> HashMap ByteString TwoBitChromosome -> Maybe TwoBitChromosome
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (ByteString
"chr" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
c) HashMap ByteString TwoBitChromosome
cs
    Maybe TwoBitChromosome
-> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (ByteString
"chr" ByteString -> ByteString -> Bool
`B.isPrefixOf` ByteString
c) Maybe () -> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a b. Maybe a -> Maybe b -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString
-> HashMap ByteString TwoBitChromosome -> Maybe TwoBitChromosome
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (Int -> ByteString -> ByteString
B.drop Int
3 ByteString
c) HashMap ByteString TwoBitChromosome
cs )
    Maybe TwoBitChromosome
-> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (ByteString
"chrM" ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
c) Maybe () -> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a b. Maybe a -> Maybe b -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString
-> HashMap ByteString TwoBitChromosome -> Maybe TwoBitChromosome
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ByteString
"MT" HashMap ByteString TwoBitChromosome
cs )
    Maybe TwoBitChromosome
-> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (ByteString
"MT" ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
c) Maybe () -> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a b. Maybe a -> Maybe b -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString
-> HashMap ByteString TwoBitChromosome -> Maybe TwoBitChromosome
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup ByteString
"chrM" HashMap ByteString TwoBitChromosome
cs )
    Maybe TwoBitChromosome
-> Maybe TwoBitChromosome -> Maybe TwoBitChromosome
forall a. Maybe a -> Maybe a -> Maybe a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ( case (TwoBitChromosome -> Bool)
-> [TwoBitChromosome] -> [TwoBitChromosome]
forall a. (a -> Bool) -> [a] -> [a]
filter (\TwoBitChromosome
d -> ByteString -> ByteString -> Bool
match ByteString
c (TwoBitChromosome -> ByteString
tbc_name TwoBitChromosome
d) Bool -> Bool -> Bool
|| ByteString -> ByteString -> Bool
match (TwoBitChromosome -> ByteString
tbc_name TwoBitChromosome
d) ByteString
c) ([TwoBitChromosome] -> [TwoBitChromosome])
-> [TwoBitChromosome] -> [TwoBitChromosome]
forall a b. (a -> b) -> a -> b
$ HashMap ByteString TwoBitChromosome -> [TwoBitChromosome]
forall k v. HashMap k v -> [v]
M.elems HashMap ByteString TwoBitChromosome
cs of
                [Item [TwoBitChromosome]
x] -> TwoBitChromosome -> Maybe TwoBitChromosome
forall a. a -> Maybe a
Just Item [TwoBitChromosome]
TwoBitChromosome
x ; [TwoBitChromosome]
_ -> Maybe TwoBitChromosome
forall a. Maybe a
Nothing )
  where
    match :: ByteString -> ByteString -> Bool
match ByteString
x ByteString
y = ByteString -> ByteString -> Bool
B.isInfixOf ((Char -> Char) -> ByteString -> ByteString
B.map Char -> Char
toLower ((Char -> Bool) -> ByteString -> ByteString
B.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'.') ByteString
x)) ByteString
y

data TwoBitChromosome = TBC { TwoBitChromosome -> ForeignPtr Word8
tbc_raw        :: {-# UNPACK #-} !(ForeignPtr Word8)
                            , TwoBitChromosome -> ByteString
tbc_name       :: {-# UNPACK #-} !B.ByteString
                            , TwoBitChromosome -> Int
tbc_index      :: {-# UNPACK #-} !Int
                            , TwoBitChromosome -> Word32
tbc_dna_offset :: {-# UNPACK #-} !Word32
                            , TwoBitChromosome -> Word32
tbc_dna_size   :: {-# UNPACK #-} !Word32
                            -- | Lazily generated sequence in forward direction; the argument is the offset of the first base.
                            , TwoBitChromosome -> Int -> TwoBitSequence' Unidrectional
tbc_fwd_seq    :: Int -> TwoBitSequence' Unidrectional
                            -- | Lazily generated sequence in reverse direction; the argument is the offset of the first base to the
                            -- right of the beginning.  (The first base generated is the complement of the base found at (offset-1).
                            , TwoBitChromosome -> Int -> TwoBitSequence' Bidirectional
tbc_rev_seq    :: Int -> TwoBitSequence' Bidirectional }


data TwoBitError = WrongSignature FilePath
                 | UnsortedBlocks FilePath
                 | OutOfBounds FilePath Word32 Int
                 | OverlongSequence FilePath Word32 Word32 Int
  deriving Int -> TwoBitError -> ShowS
[TwoBitError] -> ShowS
TwoBitError -> String
(Int -> TwoBitError -> ShowS)
-> (TwoBitError -> String)
-> ([TwoBitError] -> ShowS)
-> Show TwoBitError
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> TwoBitError -> ShowS
showsPrec :: Int -> TwoBitError -> ShowS
$cshow :: TwoBitError -> String
show :: TwoBitError -> String
$cshowList :: [TwoBitError] -> ShowS
showList :: [TwoBitError] -> ShowS
Show

instance Exception TwoBitError where
    displayException :: TwoBitError -> String
displayException (WrongSignature String
fp) = String
"The file " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
fp String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"does not have a .2bit signature."
    displayException (UnsortedBlocks String
fp) = String
"The N and mask blocks in file " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
fp String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" are not sorted."
    displayException (OutOfBounds String
fp Word32
o Int
s) = String
"Attempted to access offset " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
forall a. Show a => a -> String
show Word32
o String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" in file " String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
fp String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" of size " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"."
    displayException (OverlongSequence String
fp Word32
o Word32
l Int
s) = String
"A sequence of length " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
forall a. Show a => a -> String
show Word32
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" starting at " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
forall a. Show a => a -> String
show Word32
o String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" in file "
                                                   String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
forall a. Show a => a -> String
show String
fp String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" hangs over its end at " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"."


-- | Brings a 2bit file into memory.  The file is mmap'ed, so it will
-- not work on streams that are not actual files.  It's also unsafe if
-- the file is concurrently modified in any way.
openTwoBit :: FilePath -> IO TwoBitFile
openTwoBit :: String -> IO TwoBitFile
openTwoBit String
fp = do (ForeignPtr Word8
p,Int
o,Int
l) <- String
-> Mode -> Maybe (Int64, Int) -> IO (ForeignPtr Word8, Int, Int)
forall a.
String -> Mode -> Maybe (Int64, Int) -> IO (ForeignPtr a, Int, Int)
mmapFileForeignPtr String
fp Mode
ReadOnly Maybe (Int64, Int)
forall a. Maybe a
Nothing
                   if Int
o Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then TwoBitFile -> IO TwoBitFile
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TwoBitFile -> IO TwoBitFile) -> TwoBitFile -> IO TwoBitFile
forall a b. (a -> b) -> a -> b
$ String -> ForeignPtr Word8 -> Int -> TwoBitFile
parseTwoBit String
fp ForeignPtr Word8
p Int
l
                             else String -> IO TwoBitFile
forall a. String -> IO a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> IO TwoBitFile) -> String -> IO TwoBitFile
forall a b. (a -> b) -> a -> b
$ String
"unexpected: mmapFileForeignPtr returned an offset"


-- | Parses a 2bit file.  The @FilePath@ argument is only used in error
-- messages, what is really parsed is the memory block, typically from
-- mmapping the file.
--
-- The workhorse in here is the construction of the 'tbc_fwd_seq' and
-- 'tbc_rev_seq' functions.  When called, they first run a binary search
-- on the mask lists, then produce a list of blocks with uniform
-- masking.  Both parts of the algorithm are fast and directly use the
-- on-disk data structures.
--
-- In theory, there could be 2bit files in big endian format out there.
-- We nominally support them, but since I've never seen one in the wild,
-- this may well fail in a spectacular way.

parseTwoBit :: FilePath -> ForeignPtr Word8 -> Int -> TwoBitFile
parseTwoBit :: String -> ForeignPtr Word8 -> Int -> TwoBitFile
parseTwoBit String
fp0 ForeignPtr Word8
raw Int
size
   | (Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32     Word32
0 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0x1A412743 Bool -> Bool -> Bool
&& (Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32     Word32
4 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0  =  [TwoBitChromosome] -> TwoBitFile
kont ([TwoBitChromosome] -> TwoBitFile)
-> [TwoBitChromosome] -> TwoBitFile
forall a b. (a -> b) -> a -> b
$ (Word32 -> Word32) -> [TwoBitChromosome]
parseEachSeq ((Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32)
   | (Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32Swap Word32
0 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0x1A412743 Bool -> Bool -> Bool
&& (Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32Swap Word32
4 Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0  =  [TwoBitChromosome] -> TwoBitFile
kont ([TwoBitChromosome] -> TwoBitFile)
-> [TwoBitChromosome] -> TwoBitFile
forall a b. (a -> b) -> a -> b
$ (Word32 -> Word32) -> [TwoBitChromosome]
parseEachSeq ((Ptr Any -> IO Word32) -> Word32 -> Word32
forall {b} {a}. (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr Any -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32Swap)
   | Bool
otherwise                                                                          =  TwoBitError -> TwoBitFile
forall a e. Exception e => e -> a
throw (TwoBitError -> TwoBitFile) -> TwoBitError -> TwoBitFile
forall a b. (a -> b) -> a -> b
$ String -> TwoBitError
WrongSignature String
fp0
  where
    kont :: [TwoBitChromosome] -> TwoBitFile
kont [TwoBitChromosome]
sqs = ForeignPtr Word8
-> Int
-> ByteString
-> Array TwoBitChromosome
-> HashMap ByteString TwoBitChromosome
-> TwoBitFile
TBF ForeignPtr Word8
raw Int
size (String -> ByteString
B.pack String
fp0) ([TwoBitChromosome] -> Array TwoBitChromosome
forall a. [a] -> Array a
arrayFromList [TwoBitChromosome]
sqs) ([(ByteString, TwoBitChromosome)]
-> HashMap ByteString TwoBitChromosome
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(ByteString, TwoBitChromosome)]
 -> HashMap ByteString TwoBitChromosome)
-> [(ByteString, TwoBitChromosome)]
-> HashMap ByteString TwoBitChromosome
forall a b. (a -> b) -> a -> b
$ (TwoBitChromosome -> (ByteString, TwoBitChromosome))
-> [TwoBitChromosome] -> [(ByteString, TwoBitChromosome)]
forall a b. (a -> b) -> [a] -> [b]
map ((ByteString -> TwoBitChromosome -> (ByteString, TwoBitChromosome))
-> (TwoBitChromosome -> ByteString)
-> (TwoBitChromosome -> TwoBitChromosome)
-> TwoBitChromosome
-> (ByteString, TwoBitChromosome)
forall a b c.
(a -> b -> c)
-> (TwoBitChromosome -> a)
-> (TwoBitChromosome -> b)
-> TwoBitChromosome
-> c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 (,) TwoBitChromosome -> ByteString
tbc_name TwoBitChromosome -> TwoBitChromosome
forall a. a -> a
id) [TwoBitChromosome]
sqs)

    getW32_ :: (Ptr b -> IO a) -> Word32 -> a
getW32_ Ptr b -> IO a
f Word32
o | Word32
o Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
size  =  TwoBitError -> a
forall a e. Exception e => e -> a
throw (TwoBitError -> a) -> TwoBitError -> a
forall a b. (a -> b) -> a -> b
$ String -> Word32 -> Int -> TwoBitError
OutOfBounds String
fp0 Word32
o Int
size
                | Bool
otherwise                   =  IO a -> a
forall a. IO a -> a
unsafeInlineIO (IO a -> a) -> IO a -> a
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO a) -> IO a
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
raw ((Ptr Word8 -> IO a) -> IO a) -> (Ptr Word8 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> Ptr b -> IO a
f (Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
p (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
o))

    parseEachSeq :: (Word32 -> Word32) -> [TwoBitChromosome]
    parseEachSeq :: (Word32 -> Word32) -> [TwoBitChromosome]
parseEachSeq Word32 -> Word32
getW32 = (Int, [TwoBitChromosome]) -> [TwoBitChromosome]
forall a b. (a, b) -> b
snd ((Int, [TwoBitChromosome]) -> [TwoBitChromosome])
-> (Int, [TwoBitChromosome]) -> [TwoBitChromosome]
forall a b. (a -> b) -> a -> b
$ (Int -> Int -> (Int, TwoBitChromosome))
-> Int -> [Int] -> (Int, [TwoBitChromosome])
forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
mapAccumL ((Word32 -> Word32) -> Int -> Int -> (Int, TwoBitChromosome)
parseOneSeq Word32 -> Word32
getW32) Int
16 [Int
Item [Int]
0 .. Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Word32
getW32 Word32
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]

    parseOneSeq :: (Word32 -> Word32) -> Int -> Int -> (Int, TwoBitChromosome)
parseOneSeq Word32 -> Word32
getW32 Int
off Int
nseq =
        if Word32
packedDnaOff Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
shiftR (Word32
dnasizeWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
3) Int
2 Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
size
        then TwoBitError -> (Int, TwoBitChromosome)
forall a e. Exception e => e -> a
throw (TwoBitError -> (Int, TwoBitChromosome))
-> TwoBitError -> (Int, TwoBitChromosome)
forall a b. (a -> b) -> a -> b
$ String -> Word32 -> Word32 -> Int -> TwoBitError
OverlongSequence String
fp0 Word32
packedDnaOff Word32
dnasize Int
size
        else (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
5 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nmsize, ForeignPtr Word8
-> ByteString
-> Int
-> Word32
-> Word32
-> (Int -> TwoBitSequence' Unidrectional)
-> (Int -> TwoBitSequence' Bidirectional)
-> TwoBitChromosome
TBC ForeignPtr Word8
raw ByteString
name Int
nseq Word32
packedDnaOff Word32
dnasize Int -> TwoBitSequence' Unidrectional
unfoldSeqFwd Int -> TwoBitSequence' Bidirectional
unfoldSeqRev)
      where
        !nmsize :: Int
nmsize  = IO Int -> Int
forall a. IO a -> a
unsafeInlineIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO Int) -> IO Int
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
raw ((Ptr Word8 -> IO Int) -> IO Int)
-> (Ptr Word8 -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> IO Word8 -> IO Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> Int -> IO Word8
forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff Ptr Word8
p Int
off
        !name :: ByteString
name    = ForeignPtr Word8 -> Int -> Int -> ByteString
B.fromForeignPtr ForeignPtr Word8
raw (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
nmsize
        !offset :: Word32
offset  = Word32 -> Word32
getW32 (Word32 -> Word32) -> (Int -> Word32) -> Int -> Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> Int -> Word32
forall a b. (a -> b) -> a -> b
$ Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
nmsize

        !dnasize :: Word32
dnasize      = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offset
        !nBlockCount :: Word32
nBlockCount  = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offset Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4
        !mBlockCount :: Word32
mBlockCount  = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offset Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
8 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
8Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*Word32
nBlockCount
        !packedDnaOff :: Word32
packedDnaOff = Word32
offset Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
16 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
8 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* (Word32
nBlockCountWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
mBlockCount)

        -- Valid blocks are numbered 1..max; there are virtual guard blocks at indices 0 and (max+1), which make the later
        -- algorithms much cleaner
        n_block, m_block :: Word32 -> Block
        n_block :: Word32 -> Block
n_block Word32
i | Word32
i Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0           =  Word32 -> Word32 -> Word32 -> Block
B Word32
0 Word32
0 Word32
i
                  | Word32
i Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
nBlockCount  =  Word32 -> Word32 -> Word32 -> Block
B Word32
forall a. Bounded a => a
maxBound Word32
forall a. Bounded a => a
maxBound Word32
i
                  | Bool
otherwise        =  Word32 -> Word32 -> Word32 -> Block
B Word32
a (Word32
aWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
b) Word32
i
          where
            !a :: Word32
a = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offsetWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
4 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*Word32
i
            !b :: Word32
b = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offsetWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
4 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*(Word32
iWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
nBlockCount)

        m_block :: Word32 -> Block
m_block Word32
i | Word32
i Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0           =  Word32 -> Word32 -> Word32 -> Block
B Word32
0 Word32
0 Word32
i
                  | Word32
i Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
mBlockCount  =  Word32 -> Word32 -> Word32 -> Block
B Word32
forall a. Bounded a => a
maxBound Word32
forall a. Bounded a => a
maxBound Word32
i
                  | Bool
otherwise        =  Word32 -> Word32 -> Word32 -> Block
B Word32
a (Word32
aWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
b) Word32
i
          where
            !a :: Word32
a = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offsetWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
8 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
8Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*Word32
nBlockCount Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*Word32
i
            !b :: Word32
b = Word32 -> Word32
getW32 (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32
offsetWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
8 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
8Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*Word32
nBlockCount Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
4Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
*(Word32
iWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
mBlockCount)


        unfoldSeqFwd :: Int -> TwoBitSequence' Unidrectional
        unfoldSeqFwd :: Int -> TwoBitSequence' Unidrectional
unfoldSeqFwd Int
chroff = Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Unidrectional
unfoldSeqFwd' ((Word32 -> Block) -> Word32 -> Block
forall {t}. Integral t => (t -> Block) -> t -> Block
search Word32 -> Block
n_block Word32
nBlockCount) ((Word32 -> Block) -> Word32 -> Block
forall {t}. Integral t => (t -> Block) -> t -> Block
search Word32 -> Block
m_block Word32
mBlockCount)
                                            (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff) (Word32
packedDnaOff Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
4 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff)
          where
            trim :: Block -> Block
trim Block
b = Block
b { start_offset = max (fromIntegral chroff) (start_offset b) }

            -- finds the smallest index such that the block end(!) is larger than 'chroff'
            search :: (t -> Block) -> t -> Block
search t -> Block
f t
num  =  Block -> Block
trim (Block -> Block) -> (t -> Block) -> t -> Block
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> Block
f (t -> Block) -> t -> Block
forall a b. (a -> b) -> a -> b
$ t -> t -> t
go t
0 (t
numt -> t -> t
forall a. Num a => a -> a -> a
+t
1)
              where
                go :: t -> t -> t
go t
a t
b | t
a t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
b                                  =  t
a
                       | Block -> Word32
end_offset (t -> Block
f t
m) Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff  =  t -> t -> t
go t
a t
m
                       | Bool
otherwise                               =  t -> t -> t
go (t
mt -> t -> t
forall a. Num a => a -> a -> a
+t
1) t
b
                  where
                    m :: t
m = t -> t -> t
forall a. Integral a => a -> a -> a
div (t
a t -> t -> t
forall a. Num a => a -> a -> a
+ t
b) t
2

        unfoldSeqFwd' :: Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Unidrectional
        unfoldSeqFwd' :: Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Unidrectional
unfoldSeqFwd' nb :: Block
nb@(B Word32
nstart Word32
nend Word32
_) mb :: Block
mb@(B Word32
mstart Word32
mend Word32
_) !Word32
chroff !Word32
fileoff
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
dnasize                   =  TwoBitSequence' Unidrectional
forall dir. TwoBitSequence' dir
RefEnd
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
mstart Bool -> Bool -> Bool
|| Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
nstart  =  TwoBitError -> TwoBitSequence' Unidrectional
forall a e. Exception e => e -> a
throw (String -> TwoBitError
UnsortedBlocks String
fp0)
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
nstart Bool -> Bool -> Bool
&& Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
mstart  =  Masking -> Word32 -> TwoBitSequence' Unidrectional
advance Masking
noneMasked (Word32 -> TwoBitSequence' Unidrectional)
-> Word32 -> TwoBitSequence' Unidrectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
dnasize (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
nstart Word32
mstart
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
mstart                     =  Masking -> Word32 -> TwoBitSequence' Unidrectional
advance Masking
hardMasked (Word32 -> TwoBitSequence' Unidrectional)
-> Word32 -> TwoBitSequence' Unidrectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
dnasize (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
nend Word32
mstart
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
nstart                     =  Masking -> Word32 -> TwoBitSequence' Unidrectional
advance Masking
softMasked (Word32 -> TwoBitSequence' Unidrectional)
-> Word32 -> TwoBitSequence' Unidrectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
dnasize (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
mend Word32
nstart
            | Bool
otherwise                           =  Masking -> Word32 -> TwoBitSequence' Unidrectional
advance Masking
bothMasked (Word32 -> TwoBitSequence' Unidrectional)
-> Word32 -> TwoBitSequence' Unidrectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
dnasize (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
min Word32
nend Word32
mend
          where
            advance :: Masking -> Word32 -> TwoBitSequence' Unidrectional
advance Masking
m Word32
x = Masking
-> ForeignPtr Word8
-> Word
-> Int
-> TwoBitSequence' Unidrectional
-> TwoBitSequence' Unidrectional
forall dir.
Masking
-> ForeignPtr Word8
-> Word
-> Int
-> TwoBitSequence' dir
-> TwoBitSequence' dir
SomeSeq Masking
m ForeignPtr Word8
raw (Word32 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
fileoff) (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Word32 -> Int
forall a b. (a -> b) -> a -> b
$ Word32
x Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
- Word32
chroff) (TwoBitSequence' Unidrectional -> TwoBitSequence' Unidrectional)
-> TwoBitSequence' Unidrectional -> TwoBitSequence' Unidrectional
forall a b. (a -> b) -> a -> b
$
                          Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Unidrectional
unfoldSeqFwd' (Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
n_block Block
nb) (Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
m_block Block
mb) Word32
x (Word32
fileoffWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
xWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
-Word32
chroff)

            advanceB :: Word32 -> (Word32 -> Block) -> Block -> Block
            advanceB :: Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
f (B Word32
start Word32
end Word32
i)
                | Word32
x Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
start =  Word32 -> Word32 -> Word32 -> Block
B Word32
start Word32
end Word32
i
                | Word32
x Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<  Word32
end   =  Word32 -> Word32 -> Word32 -> Block
B Word32
x Word32
end Word32
i
                | Bool
otherwise  =  Word32 -> Block
f (Word32
iWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
1)


        unfoldSeqRev :: Int -> TwoBitSequence' Bidirectional
        unfoldSeqRev :: Int -> TwoBitSequence' Bidirectional
unfoldSeqRev Int
chroff = Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Bidirectional
unfoldSeqRev' ((Word32 -> Block) -> Word32 -> Block
forall {t}. Integral t => (t -> Block) -> t -> Block
search Word32 -> Block
n_block Word32
nBlockCount) ((Word32 -> Block) -> Word32 -> Block
forall {t}. Integral t => (t -> Block) -> t -> Block
search Word32 -> Block
m_block Word32
mBlockCount)
                                            (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff) (Word32
packedDnaOff Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
4 Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff)
          where
            trim :: Block -> Block
trim Block
b = Block
b { end_offset = min (fromIntegral chroff) (end_offset b) }

            -- finds the largest index such that the block start is smaller than chroff
            search :: (t -> Block) -> t -> Block
search t -> Block
f t
num  =  Block -> Block
trim (Block -> Block) -> (t -> Block) -> t -> Block
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> Block
f (t -> Block) -> t -> Block
forall a b. (a -> b) -> a -> b
$ t -> t -> t
go t
0 (t
numt -> t -> t
forall a. Num a => a -> a -> a
+t
1)
              where
                go :: t -> t -> t
go t
a t
b | t
a t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
b                                    =  t
a
                       | Block -> Word32
start_offset (t -> Block
f t
m) Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
chroff  =  t -> t -> t
go t
m t
b
                       | Bool
otherwise                                 =  t -> t -> t
go t
a (t
mt -> t -> t
forall a. Num a => a -> a -> a
-t
1)
                  where
                    m :: t
m = t -> t -> t
forall a. Integral a => a -> a -> a
div (t
a t -> t -> t
forall a. Num a => a -> a -> a
+ t
b t -> t -> t
forall a. Num a => a -> a -> a
+ t
1) t
2

        unfoldSeqRev' :: Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Bidirectional
        unfoldSeqRev' :: Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Bidirectional
unfoldSeqRev' nb :: Block
nb@(B Word32
nstart Word32
nend Word32
_) mb :: Block
mb@(B Word32
mstart Word32
mend Word32
_) !Word32
chroff !Word32
fileoff
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word32
0                     =  TwoBitSequence' Bidirectional
forall dir. TwoBitSequence' dir
RefEnd
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
mend Bool -> Bool -> Bool
|| Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
< Word32
nend  =  TwoBitError -> TwoBitSequence' Bidirectional
forall a e. Exception e => e -> a
throw (String -> TwoBitError
UnsortedBlocks String
fp0)
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
nend Bool -> Bool -> Bool
&& Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
mend  =  Masking -> Word32 -> TwoBitSequence' Bidirectional
advance Masking
noneMasked (Word32 -> TwoBitSequence' Bidirectional)
-> Word32 -> TwoBitSequence' Bidirectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
max Word32
nend Word32
mend
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
mend                   =  Masking -> Word32 -> TwoBitSequence' Bidirectional
advance Masking
hardMasked (Word32 -> TwoBitSequence' Bidirectional)
-> Word32 -> TwoBitSequence' Bidirectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
max Word32
nstart Word32
mend
            | Word32
chroff Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
> Word32
nend                   =  Masking -> Word32 -> TwoBitSequence' Bidirectional
advance Masking
softMasked (Word32 -> TwoBitSequence' Bidirectional)
-> Word32 -> TwoBitSequence' Bidirectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
max Word32
mstart Word32
nend
            | Bool
otherwise                       =  Masking -> Word32 -> TwoBitSequence' Bidirectional
advance Masking
bothMasked (Word32 -> TwoBitSequence' Bidirectional)
-> Word32 -> TwoBitSequence' Bidirectional
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> Word32
forall a. Ord a => a -> a -> a
max Word32
nstart Word32
mstart
          where
            advance :: Masking -> Word32 -> TwoBitSequence' Bidirectional
advance Masking
m Word32
x = Masking
-> ForeignPtr Word8
-> Word
-> Int
-> TwoBitSequence' Bidirectional
-> TwoBitSequence' Bidirectional
forall dir.
Masking
-> ForeignPtr Word8
-> Word
-> Int
-> TwoBitSequence' dir
-> TwoBitSequence' dir
SomeSeq Masking
m ForeignPtr Word8
raw (Word32 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
fileoff) (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
chroff) (TwoBitSequence' Bidirectional -> TwoBitSequence' Bidirectional)
-> TwoBitSequence' Bidirectional -> TwoBitSequence' Bidirectional
forall a b. (a -> b) -> a -> b
$
                          Block -> Block -> Word32 -> Word32 -> TwoBitSequence' Bidirectional
unfoldSeqRev' (Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
n_block Block
nb) (Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
m_block Block
mb) Word32
x (Word32
fileoffWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+Word32
xWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
-Word32
chroff)

            advanceB :: Word32 -> (Word32 -> Block) -> Block -> Block
            advanceB :: Word32 -> (Word32 -> Block) -> Block -> Block
advanceB Word32
x Word32 -> Block
f (B Word32
start Word32
end Word32
i)
                | Word32
x Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
end    =  Word32 -> Word32 -> Word32 -> Block
B Word32
start Word32
end Word32
i
                | Word32
x Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>  Word32
start  =  Word32 -> Word32 -> Word32 -> Block
B Word32
start Word32
x Word32
i
                | Bool
otherwise   =  Word32 -> Block
f (Word32
iWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
-Word32
1)


data Block = B { Block -> Word32
start_offset :: !Word32
               , Block -> Word32
end_offset   :: !Word32
               , Block -> Word32
block_number :: !Word32 }
  deriving (Int -> Block -> ShowS
[Block] -> ShowS
Block -> String
(Int -> Block -> ShowS)
-> (Block -> String) -> ([Block] -> ShowS) -> Show Block
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Block -> ShowS
showsPrec :: Int -> Block -> ShowS
$cshow :: Block -> String
show :: Block -> String
$cshowList :: [Block] -> ShowS
showList :: [Block] -> ShowS
Show, Block -> Block -> Bool
(Block -> Block -> Bool) -> (Block -> Block -> Bool) -> Eq Block
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Block -> Block -> Bool
== :: Block -> Block -> Bool
$c/= :: Block -> Block -> Bool
/= :: Block -> Block -> Bool
Eq, Eq Block
Eq Block =>
(Block -> Block -> Ordering)
-> (Block -> Block -> Bool)
-> (Block -> Block -> Bool)
-> (Block -> Block -> Bool)
-> (Block -> Block -> Bool)
-> (Block -> Block -> Block)
-> (Block -> Block -> Block)
-> Ord Block
Block -> Block -> Bool
Block -> Block -> Ordering
Block -> Block -> Block
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Block -> Block -> Ordering
compare :: Block -> Block -> Ordering
$c< :: Block -> Block -> Bool
< :: Block -> Block -> Bool
$c<= :: Block -> Block -> Bool
<= :: Block -> Block -> Bool
$c> :: Block -> Block -> Bool
> :: Block -> Block -> Bool
$c>= :: Block -> Block -> Bool
>= :: Block -> Block -> Bool
$cmax :: Block -> Block -> Block
max :: Block -> Block -> Block
$cmin :: Block -> Block -> Block
min :: Block -> Block -> Block
Ord)


-- | 2bit supports two kinds of masking, typically rendered as lowercase
-- letters ('MaskSoft') and Ns ('MaskHard').  They can overlap
-- ('MaskBoth'), and even the hard masking has underlying sequence
-- (which is normally ignored).
newtype Masking = Masking Word8 deriving (Masking -> Masking -> Bool
(Masking -> Masking -> Bool)
-> (Masking -> Masking -> Bool) -> Eq Masking
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Masking -> Masking -> Bool
== :: Masking -> Masking -> Bool
$c/= :: Masking -> Masking -> Bool
/= :: Masking -> Masking -> Bool
Eq, Eq Masking
Eq Masking =>
(Masking -> Masking -> Ordering)
-> (Masking -> Masking -> Bool)
-> (Masking -> Masking -> Bool)
-> (Masking -> Masking -> Bool)
-> (Masking -> Masking -> Bool)
-> (Masking -> Masking -> Masking)
-> (Masking -> Masking -> Masking)
-> Ord Masking
Masking -> Masking -> Bool
Masking -> Masking -> Ordering
Masking -> Masking -> Masking
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Masking -> Masking -> Ordering
compare :: Masking -> Masking -> Ordering
$c< :: Masking -> Masking -> Bool
< :: Masking -> Masking -> Bool
$c<= :: Masking -> Masking -> Bool
<= :: Masking -> Masking -> Bool
$c> :: Masking -> Masking -> Bool
> :: Masking -> Masking -> Bool
$c>= :: Masking -> Masking -> Bool
>= :: Masking -> Masking -> Bool
$cmax :: Masking -> Masking -> Masking
max :: Masking -> Masking -> Masking
$cmin :: Masking -> Masking -> Masking
min :: Masking -> Masking -> Masking
Ord)

instance Show Masking where
    show :: Masking -> String
show (Masking Word8
0) = String
"None"
    show (Masking Word8
1) = String
"Soft"
    show (Masking Word8
2) = String
"Hard"
    show (Masking Word8
_) = String
"Both"

instance Read Masking where
    readsPrec :: Int -> ReadS Masking
readsPrec Int
_ String
s = [ (Word8 -> Masking
Masking Word8
m,String
s') | (String
w,String
s') <- ReadS String
lex String
s
                                     , Word8
m <- case String
w of String
"None" -> [Word8
Item [Word8]
0]
                                                      String
"Soft" -> [Word8
Item [Word8]
1]
                                                      String
"Hard" -> [Word8
Item [Word8]
2]
                                                      String
"Both" -> [Word8
Item [Word8]
3]
                                                      String
_      -> [ ] ]

instance Semigroup Masking where
    Masking Word8
a <> :: Masking -> Masking -> Masking
<> Masking Word8
b = Word8 -> Masking
Masking (Word8
a Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
b)

instance Monoid Masking where
    mempty :: Masking
mempty = Word8 -> Masking
Masking Word8
0
    mappend :: Masking -> Masking -> Masking
mappend = Masking -> Masking -> Masking
forall a. Semigroup a => a -> a -> a
(<>)

instance Enum Masking where
    toEnum :: Int -> Masking
toEnum = Word8 -> Masking
Masking (Word8 -> Masking) -> (Int -> Word8) -> Int -> Masking
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
forall a. Enum a => Int -> a
toEnum
    fromEnum :: Masking -> Int
fromEnum (Masking Word8
m) = Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
m

instance Bounded Masking where
    minBound :: Masking
minBound = Word8 -> Masking
Masking Word8
0
    maxBound :: Masking
maxBound = Word8 -> Masking
Masking Word8
3


isSoftMasked, isHardMasked :: Masking -> Bool
isSoftMasked :: Masking -> Bool
isSoftMasked (Masking Word8
m) = Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word8
m Int
0
isHardMasked :: Masking -> Bool
isHardMasked (Masking Word8
m) = Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word8
m Int
1

noneMasked, softMasked, hardMasked, bothMasked :: Masking
noneMasked :: Masking
noneMasked = Word8 -> Masking
Masking Word8
0
softMasked :: Masking
softMasked = Word8 -> Masking
Masking Word8
1
hardMasked :: Masking
hardMasked = Word8 -> Masking
Masking Word8
2
bothMasked :: Masking
bothMasked = Word8 -> Masking
Masking Word8
3


-- | This is a (piece of a) reference sequence.  It consists of
-- stretches with uniform masking.
--
-- The offset is stored as a 'Word'.  This is done because on a 32 bit
-- platform, every bit counts.  This limits the genome to approximately
-- four gigabases, which would be a file of about one gigabyte.  That's
-- just about enough to work with the human genome.  On a 64 bit
-- platform, the file format itself imposes a limit of four gigabytes,
-- or about 16 gigabases in total.
--
-- If length is zero, the piece is empty and the mask, pointer, and
-- offset fields may not be valid.  If length is positive, ptr+offset
-- points at the first base of the piece.  If length is negative,
-- ptr+offset points just past the end of the piece, ptr+offset+length
-- points to the first base of the piece, and the sequence in meant to
-- be reverse complemented.
--
-- In a 'TwoBitSequence', length must not be negative.  In a
-- @TwoBitSequence' Bidirectional@, length can be positive or negative.

data TwoBitSequence' dir = SomeSeq {-# UNPACK #-} !Masking               -- ^ how is it masked?
                                   {-# UNPACK #-} !(ForeignPtr Word8)    -- ^ primitive bases in 2bit encoding:  [0..3] = TCAG
                                   {-# UNPACK #-} !Word                  -- ^ offset in bases(!)
                                   {-# UNPACK #-} !Int                   -- ^ length in bases
                                   (TwoBitSequence' dir)
                         | RefEnd

data Unidrectional
data Bidirectional

type TwoBitSequence = TwoBitSequence' Unidrectional

instance Show (TwoBitSequence' dir) where
    showsPrec :: Int -> TwoBitSequence' dir -> ShowS
showsPrec Int
_ (SomeSeq Masking
m ForeignPtr Word8
_ Word
_ Int
l TwoBitSequence' dir
r) = String -> ShowS
forall a. [a] -> [a] -> [a]
(++) String
"SomeSeq " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Masking -> ShowS
forall a. Show a => a -> ShowS
shows Masking
m ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:) Char
' ' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Show a => a -> ShowS
shows Int
l ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
forall a. [a] -> [a] -> [a]
(++) String
" $ " ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TwoBitSequence' dir -> ShowS
forall a. Show a => a -> ShowS
shows TwoBitSequence' dir
r
    showsPrec Int
_  TwoBitSequence' dir
RefEnd             = String -> ShowS
forall a. [a] -> [a] -> [a]
(++) String
"RefEnd"

-- | Unpacks a reference sequence into a (very long) list of bytes.
-- Each byte contains the nucleotide in bits 0 and 1 with valjues 0..3
-- corresponding to "TCAG", and the soft and hard mask bits in bits 2
-- and 3, respectively.

unpackRSRaw :: TwoBitSequence' dir -> [Word8]
unpackRSRaw :: forall dir. TwoBitSequence' dir -> [Word8]
unpackRSRaw TwoBitSequence' dir
rs = (forall b. (Word8 -> b -> b) -> b -> b) -> [Word8]
forall a. (forall b. (a -> b -> b) -> b -> b) -> [a]
build (\Word8 -> b -> b
c b
n -> (Word8 -> b -> b) -> b -> TwoBitSequence' dir -> b
forall b dir. (Word8 -> b -> b) -> b -> TwoBitSequence' dir -> b
unpackRSFB Word8 -> b -> b
c b
n TwoBitSequence' dir
rs)
{-# INLINE unpackRSRaw #-}

unpackRSFB :: (Word8 -> b -> b) -> b -> TwoBitSequence' dir -> b
unpackRSFB :: forall b dir. (Word8 -> b -> b) -> b -> TwoBitSequence' dir -> b
unpackRSFB Word8 -> b -> b
cons b
nil  =  TwoBitSequence' dir -> b
forall {dir}. TwoBitSequence' dir -> b
go0
  where
    go0 :: TwoBitSequence' dir -> b
go0  TwoBitSequence' dir
RefEnd                                               =  b
nil
    go0 (SomeSeq (Masking Word8
msk) ForeignPtr Word8
raw Word
off0 Int
len0 TwoBitSequence' dir
rs) | Int
len0 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0  =  Word -> Int -> b
forall {t} {t}. (Integral t, Bits t, Num t, Eq t) => t -> t -> b
go Word
off0 Int
len0
      where
        go :: t -> t -> b
go !t
off !t
len  =  if t
len t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
0 then TwoBitSequence' dir -> b
go0 TwoBitSequence' dir
rs else Word8
code Word8 -> b -> b
`cons` t -> t -> b
go (t
offt -> t -> t
forall a. Num a => a -> a -> a
+t
1) (t
lent -> t -> t
forall a. Num a => a -> a -> a
-t
1)
          where
            !byteoff :: Int
byteoff = t -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (t -> Int) -> t -> Int
forall a b. (a -> b) -> a -> b
$ t
off t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftR` Int
2
            !bitoff :: Int
bitoff  = t -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (t -> Int) -> t -> Int
forall a b. (a -> b) -> a -> b
$ t
off t -> t -> t
forall a. Bits a => a -> a -> a
.&. t
3
            !byte :: Word8
byte    = IO Word8 -> Word8
forall a. IO a -> a
unsafeInlineIO (IO Word8 -> Word8) -> IO Word8 -> Word8
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO Word8) -> IO Word8
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
raw (Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
`peekByteOff` Int
byteoff)
            !code :: Word8
code    = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftR Word8
byte (Int
6 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bitoff ) Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL Word8
msk Int
2

    go0 (SomeSeq (Masking Word8
msk) ForeignPtr Word8
raw Word
off0 Int
len0 TwoBitSequence' dir
rs)              =  Word -> Int -> b
forall {t} {t}. (Integral t, Bits t, Num t, Eq t) => t -> t -> b
go Word
off0 (-Int
len0)
      where
        go :: t -> t -> b
go !t
off !t
len  =  if t
len t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
0 then TwoBitSequence' dir -> b
go0 TwoBitSequence' dir
rs else Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
xor Word8
2 Word8
code Word8 -> b -> b
`cons` t -> t -> b
go (t
offt -> t -> t
forall a. Num a => a -> a -> a
-t
1) (t
lent -> t -> t
forall a. Num a => a -> a -> a
-t
1)
          where
            !byteoff :: Int
byteoff = t -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (t -> Int) -> t -> Int
forall a b. (a -> b) -> a -> b
$ (t
offt -> t -> t
forall a. Num a => a -> a -> a
-t
1) t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftR` Int
2
            !bitoff :: Int
bitoff  = t -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (t -> Int) -> t -> Int
forall a b. (a -> b) -> a -> b
$ (t
offt -> t -> t
forall a. Num a => a -> a -> a
-t
1) t -> t -> t
forall a. Bits a => a -> a -> a
.&. t
3
            !byte :: Word8
byte    = IO Word8 -> Word8
forall a. IO a -> a
unsafeInlineIO (IO Word8 -> Word8) -> IO Word8 -> Word8
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO Word8) -> IO Word8
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
raw (Ptr Word8 -> Int -> IO Word8
forall b. Ptr b -> Int -> IO Word8
forall a b. Storable a => Ptr b -> Int -> IO a
`peekByteOff` Int
byteoff)
            !code :: Word8
code    = Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftR Word8
byte (Int
6 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
bitoff ) Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL Word8
msk Int
2
{-# INLINE [0] unpackRSFB #-}

-- | Unpacks a reference sequence into a (very long) list of ASCII
-- characters.  Hard masked nucleotides become the letter 'N', others
-- become "TCAG".
unpackRS :: TwoBitSequence' dir -> [Word8]
unpackRS :: forall dir. TwoBitSequence' dir -> [Word8]
unpackRS  =  (Word8 -> Word8) -> [Word8] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (PrimArray Word8 -> Int -> Word8
forall a. Prim a => PrimArray a -> Int -> a
indexPrimArray PrimArray Word8
chars (Int -> Word8) -> (Word8 -> Int) -> Word8 -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) ([Word8] -> [Word8])
-> (TwoBitSequence' dir -> [Word8])
-> TwoBitSequence' dir
-> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TwoBitSequence' dir -> [Word8]
forall dir. TwoBitSequence' dir -> [Word8]
unpackRSRaw
  where
    !chars :: PrimArray Word8
chars = [Item (PrimArray Word8)
84,Item (PrimArray Word8)
67,Item (PrimArray Word8)
65,Item (PrimArray Word8)
71,Item (PrimArray Word8)
84,Item (PrimArray Word8)
67,Item (PrimArray Word8)
65,Item (PrimArray Word8)
71,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78]  -- "TCAGTCAGNNNNNNNN"
{-# INLINE unpackRS #-}

-- | Unpacks a reference sequence into a list of ASCII characters,
-- interpreting masking in the customary way.  Specifically, hard
-- masking produces Ns, soft masking produces lower case letters, and
-- dual masking produces lower case Ns.
unpackRSMasked :: TwoBitSequence' dir -> [Word8]
unpackRSMasked :: forall dir. TwoBitSequence' dir -> [Word8]
unpackRSMasked  =  (Word8 -> Word8) -> [Word8] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (PrimArray Word8 -> Int -> Word8
forall a. Prim a => PrimArray a -> Int -> a
indexPrimArray PrimArray Word8
chars (Int -> Word8) -> (Word8 -> Int) -> Word8 -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral) ([Word8] -> [Word8])
-> (TwoBitSequence' dir -> [Word8])
-> TwoBitSequence' dir
-> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TwoBitSequence' dir -> [Word8]
forall dir. TwoBitSequence' dir -> [Word8]
unpackRSRaw
  where
    !chars :: PrimArray Word8
chars = [Item (PrimArray Word8)
84,Item (PrimArray Word8)
67,Item (PrimArray Word8)
65,Item (PrimArray Word8)
71,Item (PrimArray Word8)
116,Item (PrimArray Word8)
99,Item (PrimArray Word8)
97,Item (PrimArray Word8)
103,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
78,Item (PrimArray Word8)
110,Item (PrimArray Word8)
110,Item (PrimArray Word8)
110,Item (PrimArray Word8)
110]  -- "TCAGtcagNNNNnnnn"
{-# INLINE unpackRSMasked #-}


-- | Reads a 32 bit word from an address, which doesn't need to be
-- aligned.  The byte order used is unspecified.
peekUnalnWord32 :: Ptr a -> IO Word32

-- | Equivalent to peekUnalnWord32 followed by a byte swap.
peekUnalnWord32Swap :: Ptr a -> IO Word32


-- List of known architectures that efficiently support unaligned accesses.
#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
    || defined(powerpc64le_HOST_ARCH) || ((defined(arm_HOST_ARCH) \
    || defined(aarch64_HOST_ARCH)) && defined(__ARM_FEATURE_UNALIGNED)) \
    || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)

peekUnalnWord32 :: forall a. Ptr a -> IO Word32
peekUnalnWord32 = Ptr Word32 -> IO Word32
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word32 -> IO Word32)
-> (Ptr a -> Ptr Word32) -> Ptr a -> IO Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr a -> Ptr Word32
forall a b. Ptr a -> Ptr b
castPtr
peekUnalnWord32Swap :: forall a. Ptr a -> IO Word32
peekUnalnWord32Swap = (Word32 -> Word32) -> IO Word32 -> IO Word32
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Word32
byteSwap32 (IO Word32 -> IO Word32)
-> (Ptr a -> IO Word32) -> Ptr a -> IO Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr Word32 -> IO Word32
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word32 -> IO Word32)
-> (Ptr a -> Ptr Word32) -> Ptr a -> IO Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ptr a -> Ptr Word32
forall a b. Ptr a -> Ptr b
castPtr

#else

peekUnalnWord32 p = do
    x <- fromIntegral <$> peekWord8 (plusPtr p 0)
    y <- fromIntegral <$> peekWord8 (plusPtr p 1)
    z <- fromIntegral <$> peekWord8 (plusPtr p 2)
    w <- fromIntegral <$> peekWord8 (plusPtr p 3)
    return $! x .|. unsafeShiftL y 8 .|. unsafeShiftL z 16 .|. unsafeShiftL w 24

peekUnalnWord32Swap p = do
    x <- fromIntegral <$> peekWord8 (plusPtr p 0)
    y <- fromIntegral <$> peekWord8 (plusPtr p 1)
    z <- fromIntegral <$> peekWord8 (plusPtr p 2)
    w <- fromIntegral <$> peekWord8 (plusPtr p 3)
    return $! w .|. unsafeShiftL z 8 .|. unsafeShiftL y 16 .|. unsafeShiftL x 24

peekWord8 :: Ptr a -> IO Word8
peekWord8 = peek . castPtr

#endif