{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE OverloadedStrings     #-}

module Bio.Data.Bed
    ( BEDLike(..)
    , BEDConvert(..)
    , BED(..)
    , BED3(..)
    , BEDGraph(..)
    , bdgValue
    , NarrowPeak(..)
    , npSignal
    , npPvalue
    , npQvalue
    , npPeak
    , BEDExt(..)
    , _bed
    , _data

    , BEDTree
    , bedToTree
    , sortedBedToTree
    , queryIntersect
    , intersecting
    , isIntersected
    , sizeOverlapped
    , splitBed
    , splitBedBySize
    , splitBedBySizeLeft
    , splitBedBySizeOverlap
    , sortBed
    , intersectBed
    , intersectBedWith
    , intersectSortedBed
    , intersectSortedBedWith
    , isOverlapped
    , mergeBed
    , mergeBedWith
    , mergeSortedBed
    , mergeSortedBedWith
--    , splitOverlapped
    , countOverlapped

    -- * IO
    , streamBed
    , streamBedGzip
    , readBed
    , sinkFileBed
    , sinkFileBedGzip
    , sinkHandleBed
    , writeBed

    , compareBed
    ) where

import           Conduit
import           Control.Arrow                ((***), first)
import           Lens.Micro
import qualified Data.ByteString.Char8        as B
import qualified Data.Foldable                as F
import           Data.Function                (on)
import qualified Data.HashMap.Strict          as M
import qualified Data.IntervalMap.Strict      as IM
import           Data.List                    (groupBy, sortBy, group)
import           Data.Conduit.Zlib           (gzip, ungzip, multiple)
import qualified Data.Vector                  as V
import qualified Data.Vector.Algorithms.Intro as I
import           System.IO

import           Bio.Data.Bed.Types
import           Bio.Utils.Misc               (binBySize, binBySizeLeft,
                                               binBySizeOverlap, bins)

-- | Convert a set of sorted bed records to interval tree, with combining
-- function for equal keys.
sortedBedToTree :: (BEDLike b, F.Foldable f)
                => (a -> a -> a)
                -> Sorted (f (b, a))
                -> BEDTree a
sortedBedToTree :: (a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a
sortedBedToTree a -> a -> a
f (Sorted f (b, a)
xs) = [(ByteString, IntervalMap Int a)] -> BEDTree a
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(ByteString, IntervalMap Int a)] -> BEDTree a)
-> [(ByteString, IntervalMap Int a)] -> BEDTree a
forall a b. (a -> b) -> a -> b
$
    ([(ByteString, (Interval Int, a))]
 -> (ByteString, IntervalMap Int a))
-> [[(ByteString, (Interval Int, a))]]
-> [(ByteString, IntervalMap Int a)]
forall a b. (a -> b) -> [a] -> [b]
map (([ByteString] -> ByteString
forall a. [a] -> a
head ([ByteString] -> ByteString)
-> ([(Interval Int, a)] -> IntervalMap Int a)
-> ([ByteString], [(Interval Int, a)])
-> (ByteString, IntervalMap Int a)
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (a -> a -> a) -> [(Interval Int, a)] -> IntervalMap Int a
forall k e a.
(Interval k e, Eq k) =>
(a -> a -> a) -> [(k, a)] -> IntervalMap k a
IM.fromAscListWith a -> a -> a
f) (([ByteString], [(Interval Int, a)])
 -> (ByteString, IntervalMap Int a))
-> ([(ByteString, (Interval Int, a))]
    -> ([ByteString], [(Interval Int, a)]))
-> [(ByteString, (Interval Int, a))]
-> (ByteString, IntervalMap Int a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(ByteString, (Interval Int, a))]
-> ([ByteString], [(Interval Int, a)])
forall a b. [(a, b)] -> ([a], [b])
unzip) ([[(ByteString, (Interval Int, a))]]
 -> [(ByteString, IntervalMap Int a)])
-> [[(ByteString, (Interval Int, a))]]
-> [(ByteString, IntervalMap Int a)]
forall a b. (a -> b) -> a -> b
$ ((ByteString, (Interval Int, a))
 -> (ByteString, (Interval Int, a)) -> Bool)
-> [(ByteString, (Interval Int, a))]
-> [[(ByteString, (Interval Int, a))]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
(==) (ByteString -> ByteString -> Bool)
-> ((ByteString, (Interval Int, a)) -> ByteString)
-> (ByteString, (Interval Int, a))
-> (ByteString, (Interval Int, a))
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (ByteString, (Interval Int, a)) -> ByteString
forall a b. (a, b) -> a
fst) ([(ByteString, (Interval Int, a))]
 -> [[(ByteString, (Interval Int, a))]])
-> [(ByteString, (Interval Int, a))]
-> [[(ByteString, (Interval Int, a))]]
forall a b. (a -> b) -> a -> b
$
    ((b, a) -> (ByteString, (Interval Int, a)))
-> [(b, a)] -> [(ByteString, (Interval Int, a))]
forall a b. (a -> b) -> [a] -> [b]
map (\(b
b, a
x) -> (b
bb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, (Int -> Int -> Interval Int
forall a. a -> a -> Interval a
IM.IntervalCO (b
bb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart) (b
bb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd), a
x))) ([(b, a)] -> [(ByteString, (Interval Int, a))])
-> [(b, a)] -> [(ByteString, (Interval Int, a))]
forall a b. (a -> b) -> a -> b
$
    f (b, a) -> [(b, a)]
forall (t :: * -> *) a. Foldable t => t a -> [a]
F.toList f (b, a)
xs
{-# INLINE sortedBedToTree #-}

bedToTree :: BEDLike b
          => (a -> a -> a)
          -> [(b, a)]
          -> BEDTree a
bedToTree :: (a -> a -> a) -> [(b, a)] -> BEDTree a
bedToTree a -> a -> a
f [(b, a)]
xs = [(ByteString, IntervalMap Int a)] -> BEDTree a
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(ByteString, IntervalMap Int a)] -> BEDTree a)
-> [(ByteString, IntervalMap Int a)] -> BEDTree a
forall a b. (a -> b) -> a -> b
$ ([(ByteString, (Interval Int, a))]
 -> (ByteString, IntervalMap Int a))
-> [[(ByteString, (Interval Int, a))]]
-> [(ByteString, IntervalMap Int a)]
forall a b. (a -> b) -> [a] -> [b]
map (([ByteString] -> ByteString
forall a. [a] -> a
head ([ByteString] -> ByteString)
-> ([(Interval Int, a)] -> IntervalMap Int a)
-> ([ByteString], [(Interval Int, a)])
-> (ByteString, IntervalMap Int a)
forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** (a -> a -> a) -> [(Interval Int, a)] -> IntervalMap Int a
forall k e a.
(Interval k e, Eq k) =>
(a -> a -> a) -> [(k, a)] -> IntervalMap k a
IM.fromAscListWith a -> a -> a
f) (([ByteString], [(Interval Int, a)])
 -> (ByteString, IntervalMap Int a))
-> ([(ByteString, (Interval Int, a))]
    -> ([ByteString], [(Interval Int, a)]))
-> [(ByteString, (Interval Int, a))]
-> (ByteString, IntervalMap Int a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(ByteString, (Interval Int, a))]
-> ([ByteString], [(Interval Int, a)])
forall a b. [(a, b)] -> ([a], [b])
unzip) ([[(ByteString, (Interval Int, a))]]
 -> [(ByteString, IntervalMap Int a)])
-> [[(ByteString, (Interval Int, a))]]
-> [(ByteString, IntervalMap Int a)]
forall a b. (a -> b) -> a -> b
$
    ((ByteString, (Interval Int, a))
 -> (ByteString, (Interval Int, a)) -> Bool)
-> [(ByteString, (Interval Int, a))]
-> [[(ByteString, (Interval Int, a))]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
(==) (ByteString -> ByteString -> Bool)
-> ((ByteString, (Interval Int, a)) -> ByteString)
-> (ByteString, (Interval Int, a))
-> (ByteString, (Interval Int, a))
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (ByteString, (Interval Int, a)) -> ByteString
forall a b. (a, b) -> a
fst) ([(ByteString, (Interval Int, a))]
 -> [[(ByteString, (Interval Int, a))]])
-> [(ByteString, (Interval Int, a))]
-> [[(ByteString, (Interval Int, a))]]
forall a b. (a -> b) -> a -> b
$
    ((b, a) -> (ByteString, (Interval Int, a)))
-> [(b, a)] -> [(ByteString, (Interval Int, a))]
forall a b. (a -> b) -> [a] -> [b]
map (\(b
b, a
x) -> (b
bb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, (Int -> Int -> Interval Int
forall a. a -> a -> Interval a
IM.IntervalCO (b
bb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart) (b
bb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd), a
x))) ([(b, a)] -> [(ByteString, (Interval Int, a))])
-> [(b, a)] -> [(ByteString, (Interval Int, a))]
forall a b. (a -> b) -> a -> b
$
    Vector (b, a) -> [(b, a)]
forall a. Vector a -> [a]
V.toList (Vector (b, a) -> [(b, a)]) -> Vector (b, a) -> [(b, a)]
forall a b. (a -> b) -> a -> b
$ (forall s. ST s (MVector s (b, a))) -> Vector (b, a)
forall a. (forall s. ST s (MVector s a)) -> Vector a
V.create ((forall s. ST s (MVector s (b, a))) -> Vector (b, a))
-> (forall s. ST s (MVector s (b, a))) -> Vector (b, a)
forall a b. (a -> b) -> a -> b
$ do
        MVector s (b, a)
v <- Vector (b, a) -> ST s (MVector (PrimState (ST s)) (b, a))
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw (Vector (b, a) -> ST s (MVector (PrimState (ST s)) (b, a)))
-> Vector (b, a) -> ST s (MVector (PrimState (ST s)) (b, a))
forall a b. (a -> b) -> a -> b
$ [(b, a)] -> Vector (b, a)
forall a. [a] -> Vector a
V.fromList [(b, a)]
xs
        Comparison (b, a) -> MVector (PrimState (ST s)) (b, a) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
I.sortBy (b -> b -> Ordering
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed (b -> b -> Ordering) -> ((b, a) -> b) -> Comparison (b, a)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (b, a) -> b
forall a b. (a, b) -> a
fst) MVector s (b, a)
MVector (PrimState (ST s)) (b, a)
v
        MVector s (b, a) -> ST s (MVector s (b, a))
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s (b, a)
v
{-# INLINE bedToTree #-}

queryIntersect :: BEDLike b => b -> BEDTree a -> [(BED3, a)]
queryIntersect :: b -> BEDTree a -> [(BED3, a)]
queryIntersect b
x BEDTree a
tree = ((Interval Int, a) -> (BED3, a))
-> [(Interval Int, a)] -> [(BED3, a)]
forall a b. (a -> b) -> [a] -> [b]
map ((Interval Int -> BED3) -> (Interval Int, a) -> (BED3, a)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first Interval Int -> BED3
f) ([(Interval Int, a)] -> [(BED3, a)])
-> [(Interval Int, a)] -> [(BED3, a)]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) a -> [(Interval Int, a)]
forall k v. IntervalMap k v -> [(k, v)]
IM.assocs (IntervalMap (Interval Int) a -> [(Interval Int, a)])
-> IntervalMap (Interval Int) a -> [(Interval Int, a)]
forall a b. (a -> b) -> a -> b
$ BEDTree a -> b -> IntervalMap (Interval Int) a
forall b a. BEDLike b => BEDTree a -> b -> IntervalMap Int a
intersecting BEDTree a
tree b
x
  where
    f :: Interval Int -> BED3
f (IM.IntervalCO Int
lo Int
hi) = ByteString -> Int -> Int -> BED3
BED3 ByteString
chr Int
lo Int
hi
    f Interval Int
_ = BED3
forall a. HasCallStack => a
undefined
    chr :: ByteString
chr = b
xb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom
{-# INLINE queryIntersect #-}

intersecting :: BEDLike b => BEDTree a -> b -> IM.IntervalMap Int a
intersecting :: BEDTree a -> b -> IntervalMap Int a
intersecting BEDTree a
tree b
x = IntervalMap Int a -> Interval Int -> IntervalMap Int a
forall k e v.
Interval k e =>
IntervalMap k v -> k -> IntervalMap k v
IM.intersecting (IntervalMap Int a -> ByteString -> BEDTree a -> IntervalMap Int a
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap Int a
forall k v. IntervalMap k v
IM.empty (b
xb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) BEDTree a
tree) (Interval Int -> IntervalMap Int a)
-> Interval Int -> IntervalMap Int a
forall a b. (a -> b) -> a -> b
$
    Int -> Int -> Interval Int
forall a. a -> a -> Interval a
IM.IntervalCO (b
xb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart) (Int -> Interval Int) -> Int -> Interval Int
forall a b. (a -> b) -> a -> b
$ b
xb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd
{-# INLINE intersecting #-}

isIntersected :: BEDLike b => BEDTree a -> b -> Bool
isIntersected :: BEDTree a -> b -> Bool
isIntersected BEDTree a
tree = Bool -> Bool
not (Bool -> Bool) -> (b -> Bool) -> b -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IntervalMap (Interval Int) a -> Bool
forall k v. IntervalMap k v -> Bool
IM.null (IntervalMap (Interval Int) a -> Bool)
-> (b -> IntervalMap (Interval Int) a) -> b -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BEDTree a -> b -> IntervalMap (Interval Int) a
forall b a. BEDLike b => BEDTree a -> b -> IntervalMap Int a
intersecting BEDTree a
tree
{-# INLINE isIntersected #-}

sizeOverlapped :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Int
sizeOverlapped :: b1 -> b2 -> Int
sizeOverlapped b1
b1 b2
b2 | b1
b1b1 -> Getting ByteString b1 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b1 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= b2
b2b2 -> Getting ByteString b2 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b2 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom = Int
0
                     | Int
overlap Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int
0
                     | Bool
otherwise = Int
overlap
  where
    overlap :: Int
overlap = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum [ b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromStart
                      , b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart
                      , b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart
                      , b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromStart ]

-- | split a bed region into k consecutive subregions, discarding leftovers
splitBed :: BEDConvert b => Int -> b -> [b]
splitBed :: Int -> b -> [b]
splitBed Int
k b
bed = ((Int, Int) -> b) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> b) -> (Int, Int) -> b
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom))) ([(Int, Int)] -> [b]) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> a -> b
$
    Int -> (Int, Int) -> [(Int, Int)]
bins Int
k (b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
{-# INLINE splitBed #-}

-- | split a bed region into consecutive fixed size subregions, discarding leftovers
splitBedBySize :: BEDConvert b => Int -> b -> [b]
splitBedBySize :: Int -> b -> [b]
splitBedBySize Int
k b
bed = ((Int, Int) -> b) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> b) -> (Int, Int) -> b
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom))) ([(Int, Int)] -> [b]) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> a -> b
$
    Int -> (Int, Int) -> [(Int, Int)]
binBySize Int
k (b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
{-# INLINE splitBedBySize #-}

-- | split a bed region into consecutive fixed size subregions, including leftovers
splitBedBySizeLeft :: BEDConvert b => Int -> b -> [b]
splitBedBySizeLeft :: Int -> b -> [b]
splitBedBySizeLeft Int
k b
bed = ((Int, Int) -> b) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> b) -> (Int, Int) -> b
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom))) ([(Int, Int)] -> [b]) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> a -> b
$
    Int -> (Int, Int) -> [(Int, Int)]
binBySizeLeft Int
k (b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
{-# INLINE splitBedBySizeLeft #-}

splitBedBySizeOverlap :: BEDConvert b
                      => Int     -- ^ bin size
                      -> Int     -- ^ overlap size
                      -> b -> [b]
splitBedBySizeOverlap :: Int -> Int -> b -> [b]
splitBedBySizeOverlap Int
k Int
o b
bed = ((Int, Int) -> b) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> [a] -> [b]
map ((Int -> Int -> b) -> (Int, Int) -> b
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom))) ([(Int, Int)] -> [b]) -> [(Int, Int)] -> [b]
forall a b. (a -> b) -> a -> b
$
    Int -> Int -> (Int, Int) -> [(Int, Int)]
binBySizeOverlap Int
k Int
o (b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
{-# INLINE splitBedBySizeOverlap #-}

-- | Compare bed records using only the chromosome, start and end positions.
-- Unlike the ``compare'' from the Ord type class, this function can compare
-- different types of BED data types.
compareBed :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed :: b1 -> b2 -> Ordering
compareBed b1
b1 b2
b2 = (ByteString, Int, Int) -> (ByteString, Int, Int) -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (b1
b1b1 -> Getting ByteString b1 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b1 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart, b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd)
                           (b2
b2b2 -> Getting ByteString b2 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b2 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromStart, b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromEnd)
{-# INLINE compareBed #-}

-- | sort BED, first by chromosome (alphabetical order), then by chromStart, last by chromEnd
sortBed :: BEDLike b => [b] -> Sorted (V.Vector b)
sortBed :: [b] -> Sorted (Vector b)
sortBed [b]
beds = Vector b -> Sorted (Vector b)
forall b. b -> Sorted b
Sorted (Vector b -> Sorted (Vector b)) -> Vector b -> Sorted (Vector b)
forall a b. (a -> b) -> a -> b
$ (forall s. ST s (MVector s b)) -> Vector b
forall a. (forall s. ST s (MVector s a)) -> Vector a
V.create ((forall s. ST s (MVector s b)) -> Vector b)
-> (forall s. ST s (MVector s b)) -> Vector b
forall a b. (a -> b) -> a -> b
$ do
    MVector s b
v <- Vector b -> ST s (MVector s b)
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw (Vector b -> ST s (MVector s b))
-> ([b] -> Vector b) -> [b] -> ST s (MVector s b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [b] -> Vector b
forall a. [a] -> Vector a
V.fromList ([b] -> ST s (MVector s b)) -> [b] -> ST s (MVector s b)
forall a b. (a -> b) -> a -> b
$ [b]
beds
    Comparison b -> MVector (PrimState (ST s)) b -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
I.sortBy Comparison b
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed MVector s b
MVector (PrimState (ST s)) b
v
    MVector s b -> ST s (MVector s b)
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s b
v
{-# INLINE sortBed #-}

-- | return records in A that are overlapped with records in B
intersectBed :: (BEDLike b1, BEDLike b2, Monad m) => [b2] -> ConduitT b1 b1 m ()
intersectBed :: [b2] -> ConduitT b1 b1 m ()
intersectBed [b2]
b = Sorted (Vector b2) -> ConduitT b1 b1 m ()
forall b1 b2 (m :: * -> *).
(BEDLike b1, BEDLike b2, Monad m) =>
Sorted (Vector b2) -> ConduitT b1 b1 m ()
intersectSortedBed Sorted (Vector b2)
b'
  where
    b' :: Sorted (Vector b2)
b' = [b2] -> Sorted (Vector b2)
forall b. BEDLike b => [b] -> Sorted (Vector b)
sortBed [b2]
b
{-# INLINE intersectBed #-}

-- | return records in A that are overlapped with records in B
intersectSortedBed :: (BEDLike b1, BEDLike b2, Monad m)
                   => Sorted (V.Vector b2) -> ConduitT b1 b1 m ()
intersectSortedBed :: Sorted (Vector b2) -> ConduitT b1 b1 m ()
intersectSortedBed (Sorted Vector b2
b) = (b1 -> Bool) -> ConduitT b1 b1 m ()
forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
filterC (Bool -> Bool
not (Bool -> Bool) -> (b1 -> Bool) -> b1 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IntervalMap (Interval Int) () -> Bool
forall k v. IntervalMap k v -> Bool
IM.null (IntervalMap (Interval Int) () -> Bool)
-> (b1 -> IntervalMap (Interval Int) ()) -> b1 -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. BEDTree () -> b1 -> IntervalMap (Interval Int) ()
forall b a. BEDLike b => BEDTree a -> b -> IntervalMap Int a
intersecting BEDTree ()
tree)
  where
    tree :: BEDTree ()
tree = (() -> () -> ()) -> Sorted (Vector (b2, ())) -> BEDTree ()
forall b (f :: * -> *) a.
(BEDLike b, Foldable f) =>
(a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a
sortedBedToTree (\()
_ ()
_ -> ()) (Sorted (Vector (b2, ())) -> BEDTree ())
-> (Vector (b2, ()) -> Sorted (Vector (b2, ())))
-> Vector (b2, ())
-> BEDTree ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (b2, ()) -> Sorted (Vector (b2, ()))
forall b. b -> Sorted b
Sorted (Vector (b2, ()) -> BEDTree ()) -> Vector (b2, ()) -> BEDTree ()
forall a b. (a -> b) -> a -> b
$ (b2 -> (b2, ())) -> Vector b2 -> Vector (b2, ())
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\b2
x -> (b2
x,())) Vector b2
b
{-# INLINE intersectSortedBed #-}

intersectBedWith :: (BEDLike b1, BEDLike b2, Monad m)
                 => (b1 -> [b2] -> a)
                 -> [b2]
                 -> ConduitT b1 a m ()
intersectBedWith :: (b1 -> [b2] -> a) -> [b2] -> ConduitT b1 a m ()
intersectBedWith b1 -> [b2] -> a
fn = (b1 -> [b2] -> a) -> Sorted (Vector b2) -> ConduitT b1 a m ()
forall b1 b2 (m :: * -> *) a.
(BEDLike b1, BEDLike b2, Monad m) =>
(b1 -> [b2] -> a) -> Sorted (Vector b2) -> ConduitT b1 a m ()
intersectSortedBedWith b1 -> [b2] -> a
fn (Sorted (Vector b2) -> ConduitT b1 a m ())
-> ([b2] -> Sorted (Vector b2)) -> [b2] -> ConduitT b1 a m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [b2] -> Sorted (Vector b2)
forall b. BEDLike b => [b] -> Sorted (Vector b)
sortBed
{-# INLINE intersectBedWith #-}

intersectSortedBedWith :: (BEDLike b1, BEDLike b2, Monad m)
                       => (b1 -> [b2] -> a)
                       -> Sorted (V.Vector b2)
                       -> ConduitT b1 a m ()
intersectSortedBedWith :: (b1 -> [b2] -> a) -> Sorted (Vector b2) -> ConduitT b1 a m ()
intersectSortedBedWith b1 -> [b2] -> a
fn (Sorted Vector b2
b) = (b1 -> a) -> ConduitT b1 a m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC ((b1 -> a) -> ConduitT b1 a m ())
-> (b1 -> a) -> ConduitT b1 a m ()
forall a b. (a -> b) -> a -> b
$ \b1
input -> b1 -> [b2] -> a
fn b1
input
    ([b2] -> a) -> [b2] -> a
forall a b. (a -> b) -> a -> b
$ [[b2]] -> [b2]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[b2]] -> [b2]) -> [[b2]] -> [b2]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [b2] -> [[b2]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [b2] -> [[b2]])
-> IntervalMap (Interval Int) [b2] -> [[b2]]
forall a b. (a -> b) -> a -> b
$ BEDTree [b2] -> b1 -> IntervalMap (Interval Int) [b2]
forall b a. BEDLike b => BEDTree a -> b -> IntervalMap Int a
intersecting BEDTree [b2]
tree b1
input
  where
    tree :: BEDTree [b2]
tree = ([b2] -> [b2] -> [b2])
-> Sorted (Vector (b2, [b2])) -> BEDTree [b2]
forall b (f :: * -> *) a.
(BEDLike b, Foldable f) =>
(a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a
sortedBedToTree [b2] -> [b2] -> [b2]
forall a. [a] -> [a] -> [a]
(++) (Sorted (Vector (b2, [b2])) -> BEDTree [b2])
-> Sorted (Vector (b2, [b2])) -> BEDTree [b2]
forall a b. (a -> b) -> a -> b
$ Vector (b2, [b2]) -> Sorted (Vector (b2, [b2]))
forall b. b -> Sorted b
Sorted (Vector (b2, [b2]) -> Sorted (Vector (b2, [b2])))
-> Vector (b2, [b2]) -> Sorted (Vector (b2, [b2]))
forall a b. (a -> b) -> a -> b
$ (b2 -> (b2, [b2])) -> Vector b2 -> Vector (b2, [b2])
forall a b. (a -> b) -> Vector a -> Vector b
V.map (\b2
x -> (b2
x, [b2
x])) Vector b2
b
{-# INLINE intersectSortedBedWith #-}

isOverlapped :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Bool
isOverlapped :: b1 -> b2 -> Bool
isOverlapped b1
b1 b2
b2 = b1
b1b1 -> Getting ByteString b1 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b1 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== b2
b2b2 -> Getting ByteString b2 ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b2 ByteString
forall b. BEDLike b => Lens' b ByteString
chrom Bool -> Bool -> Bool
&&
    Bool -> Bool
not (b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromStart Bool -> Bool -> Bool
|| b2
b2b2 -> Getting Int b2 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b2 Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= b1
b1b1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart)

-- | Merge overlapping regions.
mergeBed :: (BEDConvert b, Monad m) => [b] -> ConduitT i b m ()
mergeBed :: [b] -> ConduitT i b m ()
mergeBed [b]
xs = Vector b -> ConduitT i (Element (Vector b)) m ()
forall (m :: * -> *) mono i.
(Monad m, MonoFoldable mono) =>
mono -> ConduitT i (Element mono) m ()
yieldMany Vector b
xs' ConduitT i b m () -> ConduitM b b m () -> ConduitT i b m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM b b m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT b b m ()
mergeSortedBed
  where
    Sorted Vector b
xs' = [b] -> Sorted (Vector b)
forall b. BEDLike b => [b] -> Sorted (Vector b)
sortBed [b]
xs
{-# INLINE mergeBed #-}

-- | Merge overlapping regions according to a merging function.
mergeBedWith :: (BEDLike b, Monad m)
             => ([b] -> a) -> [b] -> ConduitT i a m ()
mergeBedWith :: ([b] -> a) -> [b] -> ConduitT i a m ()
mergeBedWith [b] -> a
f [b]
xs = Vector b -> ConduitT i (Element (Vector b)) m ()
forall (m :: * -> *) mono i.
(Monad m, MonoFoldable mono) =>
mono -> ConduitT i (Element mono) m ()
yieldMany Vector b
xs' ConduitT i b m () -> ConduitM b a m () -> ConduitT i a m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ([b] -> a) -> ConduitM b a m ()
forall b (m :: * -> *) a.
(BEDLike b, Monad m) =>
([b] -> a) -> ConduitT b a m ()
mergeSortedBedWith [b] -> a
f
  where
    Sorted Vector b
xs' = [b] -> Sorted (Vector b)
forall b. BEDLike b => [b] -> Sorted (Vector b)
sortBed [b]
xs
{-# INLINE mergeBedWith #-}

-- | Merge overlapping regions. The input stream must be sorted first.
mergeSortedBed :: (BEDConvert b, Monad m) => ConduitT b b m ()
mergeSortedBed :: ConduitT b b m ()
mergeSortedBed = ([b] -> b) -> ConduitT b b m ()
forall b (m :: * -> *) a.
(BEDLike b, Monad m) =>
([b] -> a) -> ConduitT b a m ()
mergeSortedBedWith [b] -> b
forall b s. (BEDConvert b, BEDLike s) => [s] -> b
f
  where
    f :: [s] -> b
f [s]
xs = ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed ([s] -> s
forall a. [a] -> a
head [s]
xs s -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^. Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) Int
lo Int
hi
      where
        lo :: Int
lo = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart) [s]
xs
        hi :: Int
hi = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd) [s]
xs
{-# INLINE mergeSortedBed #-}

-- | Merge overlapping regions according to a merging function. The input
-- stream must be sorted first.
mergeSortedBedWith :: (BEDLike b, Monad m)
                   => ([b] -> a) -> ConduitT b a m ()
mergeSortedBedWith :: ([b] -> a) -> ConduitT b a m ()
mergeSortedBedWith [b] -> a
mergeFn = ConduitT b a m (Maybe b)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
headC ConduitT b a m (Maybe b)
-> (Maybe b -> ConduitT b a m ()) -> ConduitT b a m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ( ConduitT b a m ()
-> (b -> ConduitT b a m ()) -> Maybe b -> ConduitT b a m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ConduitT b a m ()
forall a. Monoid a => a
mempty ((b -> ConduitT b a m ()) -> Maybe b -> ConduitT b a m ())
-> (b -> ConduitT b a m ()) -> Maybe b -> ConduitT b a m ()
forall a b. (a -> b) -> a -> b
$ \b
b0 ->
    ((ByteString, Int, Int), [b]) -> ConduitT b a m ()
forall (m :: * -> *).
Monad m =>
((ByteString, Int, Int), [b]) -> ConduitT b a m ()
go ((b
b0b -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, b
b0b -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
b0b -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd), [b
b0]) )
  where
    go :: ((ByteString, Int, Int), [b]) -> ConduitT b a m ()
go ((ByteString
chr, Int
s, Int
e), [b]
acc) = ConduitT b a m (Maybe b)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
headC ConduitT b a m (Maybe b)
-> (Maybe b -> ConduitT b a m ()) -> ConduitT b a m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT b a m ()
-> (b -> ConduitT b a m ()) -> Maybe b -> ConduitT b a m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (a -> ConduitT b a m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (a -> ConduitT b a m ()) -> a -> ConduitT b a m ()
forall a b. (a -> b) -> a -> b
$ [b] -> a
mergeFn [b]
acc) b -> ConduitT b a m ()
f
      where
        f :: b -> ConduitT b a m ()
f b
bed | ByteString
chr ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString
chr' Bool -> Bool -> Bool
|| Int
s' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
e =
                    a -> ConduitT b a m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield ([b] -> a
mergeFn [b]
acc) ConduitT b a m () -> ConduitT b a m () -> ConduitT b a m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ((ByteString, Int, Int), [b]) -> ConduitT b a m ()
go ((ByteString
chr',Int
s',Int
e'), [b
bed])
              | Int
s' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = [Char] -> ConduitT b a m ()
forall a. HasCallStack => [Char] -> a
error [Char]
"input stream is not sorted"
              | Int
e' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
e = ((ByteString, Int, Int), [b]) -> ConduitT b a m ()
go ((ByteString
chr',Int
s,Int
e'), b
bedb -> [b] -> [b]
forall a. a -> [a] -> [a]
:[b]
acc)
              | Bool
otherwise = ((ByteString, Int, Int), [b]) -> ConduitT b a m ()
go ((ByteString
chr,Int
s,Int
e), b
bedb -> [b] -> [b]
forall a. a -> [a] -> [a]
:[b]
acc)
          where
            chr' :: ByteString
chr' = b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom
            s' :: Int
s' = b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart
            e' :: Int
e' = b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd
{-# INLINE mergeSortedBedWith #-}

{-
-- | Split overlapped regions into non-overlapped regions. The input must be overlapped.
-- This function is usually used with `mergeBedWith`.
splitOverlapped :: BEDLike b => ([b] -> a) -> [b] -> [(BED3, a)]
splitOverlapped fun xs = filter ((>0) . size . fst) $
    evalState (F.foldrM f [] $ init anchors) $
        (\(a,b) -> (fromEither a, M.singleton (b^.chromStart, b^.chromEnd) b)) $
        last anchors
  where
    anchors = sortBy (comparing (fromEither . fst)) $ concatMap
        ( \x -> [(Left $ x^.chromStart, x), (Right $ x^.chromEnd, x)] ) xs
    f (i, x) acc = do
        (j, s) <- get
        let bed = (asBed chr (fromEither i) j, fun $ M.elems s)
            s' = case i of
                Left _  -> M.delete (x^.chromStart, x^.chromEnd) s
                Right _ -> M.insert (x^.chromStart, x^.chromEnd) x s
        put (fromEither i, s')
        return (bed:acc)
    fromEither (Left x)  = x
    fromEither (Right x) = x
    chr = head xs ^. chrom
{-# INLINE splitOverlapped #-}
-}

-- | Split overlapped regions into non-overlapped regions. The input must be overlapped.
-- This function is usually used with `mergeBedWith`.
countOverlapped :: BEDLike b => [b] -> [(BED3, Int)]
countOverlapped :: [b] -> [(BED3, Int)]
countOverlapped [b]
xs = [(BED3, Int)] -> [(BED3, Int)]
forall a. [a] -> [a]
reverse ([(BED3, Int)] -> [(BED3, Int)]) -> [(BED3, Int)] -> [(BED3, Int)]
forall a b. (a -> b) -> a -> b
$ (\(Int
_,Int
_,[(BED3, Int)]
x) -> [(BED3, Int)]
x) ((Int, Int, [(BED3, Int)]) -> [(BED3, Int)])
-> (Int, Int, [(BED3, Int)]) -> [(BED3, Int)]
forall a b. (a -> b) -> a -> b
$
    ((Int, Int, [(BED3, Int)])
 -> (Either Int Int, Int) -> (Int, Int, [(BED3, Int)]))
-> (Int, Int, [(BED3, Int)])
-> [(Either Int Int, Int)]
-> (Int, Int, [(BED3, Int)])
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
F.foldl' (Int, Int, [(BED3, Int)])
-> (Either Int Int, Int) -> (Int, Int, [(BED3, Int)])
forall b a.
(Num b, BEDConvert a) =>
(Int, b, [(a, b)]) -> (Either Int Int, b) -> (Int, b, [(a, b)])
go (Int
p0, Int
c0, []) ([(Either Int Int, Int)] -> (Int, Int, [(BED3, Int)]))
-> [(Either Int Int, Int)] -> (Int, Int, [(BED3, Int)])
forall a b. (a -> b) -> a -> b
$ [(Either Int Int, Int)] -> [(Either Int Int, Int)]
forall a. [a] -> [a]
tail [(Either Int Int, Int)]
anchors
  where
    (Left Int
p0, Int
c0) = [(Either Int Int, Int)] -> (Either Int Int, Int)
forall a. [a] -> a
head [(Either Int Int, Int)]
anchors
    go :: (Int, b, [(a, b)]) -> (Either Int Int, b) -> (Int, b, [(a, b)])
go (Int
x, b
accum, [(a, b)]
result) (Left Int
p, b
count)
        | Int
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
p = (Int
p, b
accum b -> b -> b
forall a. Num a => a -> a -> a
+ b
count, [(a, b)]
result)
        | Bool
otherwise = (Int
p, b
accum b -> b -> b
forall a. Num a => a -> a -> a
+ b
count, (ByteString -> Int -> Int -> a
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed ByteString
chr Int
x (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ Int
p, b
accum)(a, b) -> [(a, b)] -> [(a, b)]
forall a. a -> [a] -> [a]
:[(a, b)]
result)
    go (Int
x, b
accum, [(a, b)]
result) (Right Int
p, b
count) =
      (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1, b
accum b -> b -> b
forall a. Num a => a -> a -> a
- b
count, (ByteString -> Int -> Int -> a
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed ByteString
chr Int
x (Int -> a) -> Int -> a
forall a b. (a -> b) -> a -> b
$ Int
p Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, b
accum)(a, b) -> [(a, b)] -> [(a, b)]
forall a. a -> [a] -> [a]
:[(a, b)]
result)
    anchors :: [(Either Int Int, Int)]
anchors = ([Either Int Int] -> (Either Int Int, Int))
-> [[Either Int Int]] -> [(Either Int Int, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (\[Either Int Int]
x -> ([Either Int Int] -> Either Int Int
forall a. [a] -> a
head [Either Int Int]
x, [Either Int Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Either Int Int]
x)) ([[Either Int Int]] -> [(Either Int Int, Int)])
-> [[Either Int Int]] -> [(Either Int Int, Int)]
forall a b. (a -> b) -> a -> b
$ [Either Int Int] -> [[Either Int Int]]
forall a. Eq a => [a] -> [[a]]
group ([Either Int Int] -> [[Either Int Int]])
-> [Either Int Int] -> [[Either Int Int]]
forall a b. (a -> b) -> a -> b
$ (Either Int Int -> Either Int Int -> Ordering)
-> [Either Int Int] -> [Either Int Int]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy Either Int Int -> Either Int Int -> Ordering
forall a. Ord a => Either a a -> Either a a -> Ordering
cmp ([Either Int Int] -> [Either Int Int])
-> [Either Int Int] -> [Either Int Int]
forall a b. (a -> b) -> a -> b
$ (b -> [Either Int Int]) -> [b] -> [Either Int Int]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap
        (\b
x -> [Int -> Either Int Int
forall a b. a -> Either a b
Left (Int -> Either Int Int) -> Int -> Either Int Int
forall a b. (a -> b) -> a -> b
$ b
xb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, Int -> Either Int Int
forall a b. b -> Either a b
Right (Int -> Either Int Int) -> Int -> Either Int Int
forall a b. (a -> b) -> a -> b
$ b
xb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]) [b]
xs
    cmp :: Either a a -> Either a a -> Ordering
cmp (Left a
x) (Left a
y) = a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
x a
y
    cmp (Right a
x) (Right a
y) = a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
x a
y
    cmp (Left a
x) (Right a
y) = case a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
x a
y of
        Ordering
EQ -> Ordering
LT
        Ordering
o -> Ordering
o
    cmp (Right a
x) (Left a
y) = case a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
x a
y of
        Ordering
EQ -> Ordering
GT
        Ordering
o -> Ordering
o
    chr :: ByteString
chr = [b] -> b
forall a. [a] -> a
head [b]
xs b -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^. Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom
{-# INLINE countOverlapped #-}

streamBed :: (MonadResource m, BEDConvert b, MonadIO m)
          => FilePath -> ConduitT i b m () 
streamBed :: [Char] -> ConduitT i b m ()
streamBed [Char]
input = [Char] -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
[Char] -> ConduitT i ByteString m ()
sourceFile [Char]
input ConduitT i ByteString m ()
-> ConduitM ByteString b m () -> ConduitT i b m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString b m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT ByteString b m ()
bsToBed
{-# INLINE streamBed #-}

streamBedGzip :: (BEDConvert b, MonadResource m, MonadThrow m, PrimMonad m)
              => FilePath -> ConduitT i b m () 
streamBedGzip :: [Char] -> ConduitT i b m ()
streamBedGzip [Char]
input = [Char] -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
[Char] -> ConduitT i ByteString m ()
sourceFile [Char]
input ConduitT i ByteString m ()
-> ConduitM ByteString b m () -> ConduitT i b m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
-> ConduitT ByteString ByteString m ()
forall (m :: * -> *) a.
Monad m =>
ConduitT ByteString a m () -> ConduitT ByteString a m ()
multiple ConduitT ByteString ByteString m ()
forall (m :: * -> *).
(PrimMonad m, MonadThrow m) =>
ConduitT ByteString ByteString m ()
ungzip ConduitT ByteString ByteString m ()
-> ConduitM ByteString b m () -> ConduitM ByteString b m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString b m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT ByteString b m ()
bsToBed
{-# INLINE streamBedGzip #-}

readBed :: BEDConvert b => FilePath -> IO [b]
readBed :: [Char] -> IO [b]
readBed [Char]
fl = ResourceT IO [b] -> IO [b]
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO [b] -> IO [b]) -> ResourceT IO [b] -> IO [b]
forall a b. (a -> b) -> a -> b
$ ConduitT () Void (ResourceT IO) [b] -> ResourceT IO [b]
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (ResourceT IO) [b] -> ResourceT IO [b])
-> ConduitT () Void (ResourceT IO) [b] -> ResourceT IO [b]
forall a b. (a -> b) -> a -> b
$ [Char] -> ConduitT () b (ResourceT IO) ()
forall (m :: * -> *) b i.
(MonadResource m, BEDConvert b, MonadIO m) =>
[Char] -> ConduitT i b m ()
streamBed [Char]
fl ConduitT () b (ResourceT IO) ()
-> ConduitM b Void (ResourceT IO) [b]
-> ConduitT () Void (ResourceT IO) [b]
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM b Void (ResourceT IO) [b]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
sinkList
{-# INLINE readBed #-}

sinkFileBed :: (BEDConvert b, MonadResource m) => FilePath -> ConduitT b o m ()
sinkFileBed :: [Char] -> ConduitT b o m ()
sinkFileBed [Char]
output = ConduitT b ByteString m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT b ByteString m ()
bedToBS ConduitT b ByteString m ()
-> ConduitM ByteString o m () -> ConduitT b o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| [Char] -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
[Char] -> ConduitT ByteString o m ()
sinkFile [Char]
output
{-# INLINE sinkFileBed #-}

sinkFileBedGzip :: (BEDConvert b, MonadResource m, MonadThrow m, PrimMonad m)
                => FilePath -> ConduitT b o m ()
sinkFileBedGzip :: [Char] -> ConduitT b o m ()
sinkFileBedGzip [Char]
output = ConduitT b ByteString m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT b ByteString m ()
bedToBS ConduitT b ByteString m ()
-> ConduitM ByteString o m () -> ConduitT b o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT ByteString ByteString m ()
forall (m :: * -> *).
(MonadThrow m, PrimMonad m) =>
ConduitT ByteString ByteString m ()
gzip ConduitT ByteString ByteString m ()
-> ConduitM ByteString o m () -> ConduitM ByteString o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| [Char] -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
[Char] -> ConduitT ByteString o m ()
sinkFile [Char]
output
{-# INLINE sinkFileBedGzip #-}

sinkHandleBed :: (BEDConvert b, MonadIO m) => Handle -> ConduitT b o m ()
sinkHandleBed :: Handle -> ConduitT b o m ()
sinkHandleBed Handle
hdl = ConduitT b ByteString m ()
forall b (m :: * -> *).
(BEDConvert b, Monad m) =>
ConduitT b ByteString m ()
bedToBS ConduitT b ByteString m ()
-> ConduitM ByteString o m () -> ConduitT b o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| Handle -> ConduitM ByteString o m ()
forall (m :: * -> *) o.
MonadIO m =>
Handle -> ConduitT ByteString o m ()
sinkHandle Handle
hdl
{-# INLINE sinkHandleBed #-}

writeBed :: BEDConvert b => FilePath -> [b] -> IO ()
writeBed :: [Char] -> [b] -> IO ()
writeBed [Char]
fl [b]
beds = ResourceT IO () -> IO ()
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO () -> IO ()) -> ResourceT IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ ConduitT () Void (ResourceT IO) () -> ResourceT IO ()
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (ResourceT IO) () -> ResourceT IO ())
-> ConduitT () Void (ResourceT IO) () -> ResourceT IO ()
forall a b. (a -> b) -> a -> b
$ [b] -> ConduitT () (Element [b]) (ResourceT IO) ()
forall (m :: * -> *) mono i.
(Monad m, MonoFoldable mono) =>
mono -> ConduitT i (Element mono) m ()
yieldMany [b]
beds ConduitT () b (ResourceT IO) ()
-> ConduitM b Void (ResourceT IO) ()
-> ConduitT () Void (ResourceT IO) ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| [Char] -> ConduitM b Void (ResourceT IO) ()
forall b (m :: * -> *) o.
(BEDConvert b, MonadResource m) =>
[Char] -> ConduitT b o m ()
sinkFileBed [Char]
fl
{-# INLINE writeBed #-}

bedToBS :: (BEDConvert b, Monad m) => ConduitT b B.ByteString m ()
bedToBS :: ConduitT b ByteString m ()
bedToBS = (b -> ByteString) -> ConduitT b ByteString m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC b -> ByteString
forall b. BEDConvert b => b -> ByteString
toLine ConduitT b ByteString m ()
-> ConduitM ByteString ByteString m ()
-> ConduitT b ByteString m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM ByteString ByteString m ()
forall (m :: * -> *) seq.
(Monad m, IsSequence seq, Element seq ~ Word8) =>
ConduitT seq seq m ()
unlinesAsciiC
{-# INLINE bedToBS #-}

bsToBed :: (BEDConvert b, Monad m) => ConduitT B.ByteString b m ()
bsToBed :: ConduitT ByteString b m ()
bsToBed = ConduitT ByteString ByteString m ()
forall (m :: * -> *) seq.
(Monad m, IsSequence seq, Element seq ~ Word8) =>
ConduitT seq seq m ()
linesUnboundedAsciiC ConduitT ByteString ByteString m ()
-> ConduitT ByteString b m () -> ConduitT ByteString b m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| (ByteString -> b) -> ConduitT ByteString b m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC ByteString -> b
forall b. BEDConvert b => ByteString -> b
fromLine
{-# INLINE bsToBed #-}