{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}

module Bio.Utils.Functions (
      ihs
    , ihs'
    , scale
    , filterFDR
    , slideAverage
    , hyperquick
    , gaussianKDE
    , kld
    , jsd
    , binarySearch
    , binarySearchBy
    , binarySearchByBounds
    , quantileNormalization
    , quantileNormalization'
) where

import Data.Bits (shiftR)
import Data.List (foldl', groupBy)
import Data.Function (on)
import Data.Ord (comparing)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Matrix as M
import Statistics.Sample (meanVarianceUnb, mean)
import Statistics.Function (sortBy)
import Control.Parallel.Strategies (parMap, rseq)
import Statistics.Sample.KernelDensity (kde)

-- | inverse hyperbolic sine transformation
ihs :: Double  -- ^ θ, determine the shape of the function
    -> Double  -- ^ input
    -> Double
ihs :: Double -> Double -> Double
ihs !Double
θ !Double
x | Double
θ Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0 = Double
x
          | Bool
otherwise = Double -> Double
forall a. Floating a => a -> a
log (Double
θ Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
forall a. Floating a => a -> a
sqrt (Double
θ Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
θ Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
θ
{-# INLINE ihs #-}

-- | inverse hyperbolic sine transformation with θ = 1
ihs' :: Double -> Double
ihs' :: Double -> Double
ihs' = Double -> Double -> Double
ihs Double
1

-- | scale data to zero mean and 1 variance
scale :: G.Vector v Double => v Double -> v Double
scale :: v Double -> v Double
scale v Double
xs = (Double -> Double) -> v Double -> v Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map (\Double
x -> (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
m) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
sqrt Double
s) v Double
xs
  where
    (Double
m,Double
s) = v Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
meanVarianceUnb v Double
xs
{-# INLINE scale #-}

-- | given the p-values, filter data by controling FDR
filterFDR :: G.Vector v (a, Double)
          => Double  -- ^ desired FDR value
          -> v (a, Double)  -- ^ input data and pvalues
          -> v (a, Double)
filterFDR :: Double -> v (a, Double) -> v (a, Double)
filterFDR Double
α v (a, Double)
xs = Int -> v (a, Double) -> v (a, Double)
forall (v :: * -> *) a.
Vector v (a, Double) =>
Int -> v (a, Double) -> v (a, Double)
go Int
n (v (a, Double) -> v (a, Double))
-> (v (a, Double) -> v (a, Double))
-> v (a, Double)
-> v (a, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comparison (a, Double) -> v (a, Double) -> v (a, Double)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((a, Double) -> Double) -> Comparison (a, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (a, Double) -> Double
forall a b. (a, b) -> b
snd) (v (a, Double) -> v (a, Double)) -> v (a, Double) -> v (a, Double)
forall a b. (a -> b) -> a -> b
$ v (a, Double)
xs
  where
    go :: Int -> v (a, Double) -> v (a, Double)
go Int
rank v (a, Double)
v | Int
rank Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = v (a, Double)
forall (v :: * -> *) a. Vector v a => v a
G.empty
              | (a, Double) -> Double
forall a b. (a, b) -> b
snd (v (a, Double)
v v (a, Double) -> Int -> (a, Double)
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` (Int
rankInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
rank Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
α Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n = Int -> Int -> v (a, Double) -> v (a, Double)
forall (v :: * -> *) a. Vector v a => Int -> Int -> v a -> v a
G.slice Int
0 Int
rank v (a, Double)
v
              | Bool
otherwise = Int -> v (a, Double) -> v (a, Double)
go (Int
rankInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) v (a, Double)
v
    n :: Int
n = v (a, Double) -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v (a, Double)
xs
{-# INLINE filterFDR #-}

-- | Compute the sliding average for each entry in a vector
slideAverage :: (Fractional a, G.Vector v a)
             => Int              -- ^ size of HALF sliding window, 2 means a total
                                 -- window size is 5
             -> v a
             -> v a
slideAverage :: Int -> v a -> v a
slideAverage Int
k v a
xs = Int -> (Int -> a) -> v a
forall (v :: * -> *) a. Vector v a => Int -> (Int -> a) -> v a
G.generate Int
n ((Int -> a) -> v a) -> (Int -> a) -> v a
forall a b. (a -> b) -> a -> b
$ \Int
i -> v a -> Int -> Int -> a
forall a (v :: * -> *).
(Fractional a, Vector v a) =>
v a -> Int -> Int -> a
go v a
xs (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
k)) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
k))
  where
    go :: v a -> Int -> Int -> a
go v a
v Int
i Int
j = let l :: Int
l = Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
               in (a -> a -> a) -> a -> v a -> a
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
G.foldl' a -> a -> a
forall a. Num a => a -> a -> a
(+) a
0 (Int -> Int -> v a -> v a
forall (v :: * -> *) a. Vector v a => Int -> Int -> v a -> v a
G.unsafeSlice Int
i Int
l v a
v) a -> a -> a
forall a. Fractional a => a -> a -> a
/ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l
    n :: Int
n = v a -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v a
xs
{-# INLINE slideAverage #-}

hyperquick :: Int -> Int -> Int -> Int -> Double
hyperquick :: Int -> Int -> Int -> Int -> Double
hyperquick Int
x Int
m Int
_n Int
_N = Int -> Double -> Double -> Double -> Double
loop (Int
mInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2) Double
s Double
s (Double
2Double -> Double -> Double
forall a. Num a => a -> a -> a
*Double
e)
  where
    loop :: Int -> Double -> Double -> Double -> Double
loop !Int
k !Double
ak !Double
bk !Double
epsk
        | Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
_N Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Int
_nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
x) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Bool -> Bool -> Bool
&& Double
epsk Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
e =
            let ck :: Double
ck = Double
ak Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
bk
                k' :: Int
k' = Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
                jjm :: Double
jjm = Int -> Int -> Int -> Int -> Double
forall a a a.
(Fractional a, Integral a, Integral a) =>
a -> a -> a -> a -> a
invJm Int
_n Int
x Int
_N Int
k'
                bk' :: Double
bk' = Double
bk Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
jjm Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1
                ak' :: Double
ak' = Double
ak Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
jjm
                espk' :: Double
espk' = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
_N Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Int
_n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
x) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k') Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
ck Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
ak' Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
bk')
            in Int -> Double -> Double -> Double -> Double
loop Int
k' Double
ak' Double
bk' Double
espk'
        | Bool
otherwise = Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- (Double
ak Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
bk Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
epsk Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2)
    s :: Double
s = (Double -> Int -> Double) -> Double -> [Int] -> Double
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Double
s' Int
k -> Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
s' Double -> Double -> Double
forall a. Num a => a -> a -> a
* Int -> Int -> Int -> Int -> Double
forall a a a.
(Fractional a, Integral a, Integral a) =>
a -> a -> a -> a -> a
invJm Int
_n Int
x Int
_N Int
k) Double
1.0 [Int
x..Int
mInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2]
    invJm :: a -> a -> a -> a -> a
invJm a
_n a
_x a
_N a
_m = ( a
1 a -> a -> a
forall a. Num a => a -> a -> a
- a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
_x a -> a -> a
forall a. Fractional a => a -> a -> a
/ a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
_ma -> a -> a
forall a. Num a => a -> a -> a
+a
1) ) a -> a -> a
forall a. Fractional a => a -> a -> a
/
                          ( a
1 a -> a -> a
forall a. Num a => a -> a -> a
- a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
_na -> a -> a
forall a. Num a => a -> a -> a
-a
1a -> a -> a
forall a. Num a => a -> a -> a
-a
_x) a -> a -> a
forall a. Fractional a => a -> a -> a
/ a -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (a
_Na -> a -> a
forall a. Num a => a -> a -> a
-a
1a -> a -> a
forall a. Num a => a -> a -> a
-a
_m) )
    e :: Double
e = Double
1e-20

-- | Assign weights to the points according to density estimation.
gaussianKDE :: Int  -- ^ number of mesh points used in KDE
            -> U.Vector Double -> (Double -> Double)
gaussianKDE :: Int -> Vector Double -> Double -> Double
gaussianKDE Int
n Vector Double
xs = \Double
x -> 
    let i :: Int
i = Vector Double -> Double -> Int
forall (v :: * -> *) e. (Vector v e, Ord e) => v e -> e -> Int
binarySearch Vector Double
points Double
x
        lo :: Double
lo = Vector Double
points Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        lo_d :: Double
lo_d = Vector Double
den Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) 
        hi :: Double
hi = Vector Double
points Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i
        hi_d :: Double
hi_d = Vector Double
den Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i
        hi_w :: Double
hi_w = (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
hi Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo)
        lo_w :: Double
lo_w = Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
hi_w
     in Double
lo_w Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lo_d Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
hi_w Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
hi_d
  where
    (Vector Double
points, Vector Double
den) = Int -> Vector Double -> (Vector Double, Vector Double)
forall (v :: * -> *).
(Vector v CD, Vector v Double, Vector v Int) =>
Int -> v Double -> (v Double, v Double)
kde Int
n Vector Double
xs

-- | compute the Kullback-Leibler divergence between two valid (not check) probability distributions.
-- kl(X,Y) = \sum_i P(x_i) log_2(P(x_i)\/P(y_i)).
kld :: (G.Vector v Double, G.Vector v (Double, Double)) => v Double -> v Double -> Double
kld :: v Double -> v Double -> Double
kld v Double
xs v Double
ys | v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
xs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v Double
ys = [Char] -> Double
forall a. HasCallStack => [Char] -> a
error [Char]
"Incompitable dimensions"
          | Bool
otherwise = (Double -> (Double, Double) -> Double)
-> Double -> v (Double, Double) -> Double
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
G.foldl' Double -> (Double, Double) -> Double
forall a. (Eq a, Floating a) => a -> (a, a) -> a
f Double
0 (v (Double, Double) -> Double)
-> (v Double -> v (Double, Double)) -> v Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. v Double -> v Double -> v (Double, Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
G.zip v Double
xs (v Double -> Double) -> v Double -> Double
forall a b. (a -> b) -> a -> b
$ v Double
ys
  where
    f :: a -> (a, a) -> a
f a
acc (a
x, a
y) | a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0 = a
acc
                 | Bool
otherwise = a
acc a -> a -> a
forall a. Num a => a -> a -> a
+ a
x a -> a -> a
forall a. Num a => a -> a -> a
* (a -> a -> a
forall a. Floating a => a -> a -> a
logBase a
2 a
x a -> a -> a
forall a. Num a => a -> a -> a
- a -> a -> a
forall a. Floating a => a -> a -> a
logBase a
2 a
y)
{-# SPECIALIZE kld :: U.Vector Double -> U.Vector Double -> Double #-}
{-# SPECIALIZE kld :: V.Vector Double -> V.Vector Double -> Double #-}

-- | Jensen-Shannon divergence: JS(X,Y) = 1\/2 KL(X,(X+Y)\/2) + 1\/2 KL(Y,(X+Y)\/2).
jsd :: (G.Vector v Double, G.Vector v (Double, Double)) => v Double -> v Double -> Double
jsd :: v Double -> v Double -> Double
jsd v Double
xs v Double
ys = Double
0.5 Double -> Double -> Double
forall a. Num a => a -> a -> a
* v Double -> v Double -> Double
forall (v :: * -> *).
(Vector v Double, Vector v (Double, Double)) =>
v Double -> v Double -> Double
kld v Double
xs v Double
zs Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
0.5 Double -> Double -> Double
forall a. Num a => a -> a -> a
* v Double -> v Double -> Double
forall (v :: * -> *).
(Vector v Double, Vector v (Double, Double)) =>
v Double -> v Double -> Double
kld v Double
ys v Double
zs
  where zs :: v Double
zs = (Double -> Double -> Double) -> v Double -> v Double -> v Double
forall (v :: * -> *) a b c.
(Vector v a, Vector v b, Vector v c) =>
(a -> b -> c) -> v a -> v b -> v c
G.zipWith (\Double
x Double
y -> (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
y) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2) v Double
xs v Double
ys
{-# SPECIALIZE jsd :: U.Vector Double -> U.Vector Double -> Double #-}
{-# SPECIALIZE jsd :: V.Vector Double -> V.Vector Double -> Double #-}

-- | O(log n). return the position of the first element that is >= query
binarySearch :: (G.Vector v e, Ord e)
             => v e -> e -> Int
binarySearch :: v e -> e -> Int
binarySearch v e
vec e
e = (e -> e -> Ordering) -> v e -> e -> Int -> Int -> Int
forall (v :: * -> *) e a.
Vector v e =>
(e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
binarySearchByBounds e -> e -> Ordering
forall a. Ord a => a -> a -> Ordering
compare v e
vec e
e Int
0 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ v e -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v e
vec Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
{-# INLINE binarySearch #-}

binarySearchBy :: G.Vector v e
               => (e -> a -> Ordering) -> v e -> a -> Int
binarySearchBy :: (e -> a -> Ordering) -> v e -> a -> Int
binarySearchBy e -> a -> Ordering
cmp v e
vec a
e = (e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
forall (v :: * -> *) e a.
Vector v e =>
(e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
binarySearchByBounds e -> a -> Ordering
cmp v e
vec a
e Int
0 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ v e -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v e
vec Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
{-# INLINE binarySearchBy #-}

binarySearchByBounds :: G.Vector v e
                     => (e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
binarySearchByBounds :: (e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
binarySearchByBounds e -> a -> Ordering
cmp v e
vec a
e = Int -> Int -> Int
loop
  where
    loop :: Int -> Int -> Int
loop !Int
l !Int
u
        | Int
u Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l = Int
l
        | Bool
otherwise = case e -> a -> Ordering
cmp (v e
vec v e -> Int -> e
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
G.! Int
k) a
e of
                        Ordering
LT -> Int -> Int -> Int
loop (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
u
                        Ordering
EQ -> Int -> Int -> Int
loop Int
l (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                        Ordering
GT -> Int -> Int -> Int
loop Int
l (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
      where k :: Int
k = Int
u Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
1
{-# INLINE binarySearchByBounds #-}

{-
empiricalCDF :: G.Vector v Double => v Double -> v Double
empiricalCDF xs = runST $ do
    let n = G.length xs
        indices = groupBy ( (==) `on` ((xs G.!).snd) ) $ zip [1.0..] $ sortBy (compare `on` (xs G.!)) [0..n-1]
        updates mv (v,ys) = mapM_ (flip (GM.unsafeWrite mv) v.snd) ys
    xs' <- G.thaw xs
    mapM_ (updates xs'. ((flip (/) (fromIntegral n).fst.last) &&& id)) indices
    G.unsafeFreeze xs'
{-# INLINE empiricalCDF #-}

cdf :: G.Vector v Double => v Double -> v Double
cdf xs = let f = empiricalCDF xs
             n = fromIntegral $ G.length xs
             δ = 1 / (4 * (n**0.25) * sqrt (pi * log n))
         in G.map (\ x -> case () of
             _ | x < δ -> δ
               | x > 1 - δ -> 1 - δ
               | otherwise -> x) f
{-# INLINE cdf #-}
-}

-- | Columns are samples, rows are features / genes.
quantileNormalization :: M.Matrix Double -> M.Matrix Double
quantileNormalization :: Matrix Double -> Matrix Double
quantileNormalization Matrix Double
mat = [Vector Double] -> Matrix Double
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector Double] -> Matrix Double)
-> [Vector Double] -> Matrix Double
forall a b. (a -> b) -> a -> b
$ (Vector (Int, Double) -> Vector Double)
-> [Vector (Int, Double)] -> [Vector Double]
forall a b. (a -> b) -> [a] -> [b]
map
    ((Vector Double, Vector Int) -> Vector Double
forall a b. (a, b) -> a
fst ((Vector Double, Vector Int) -> Vector Double)
-> (Vector (Int, Double) -> (Vector Double, Vector Int))
-> Vector (Int, Double)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Double, Int) -> (Vector Double, Vector Int)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip (Vector (Double, Int) -> (Vector Double, Vector Int))
-> (Vector (Int, Double) -> Vector (Double, Int))
-> Vector (Int, Double)
-> (Vector Double, Vector Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comparison (Double, Int)
-> Vector (Double, Int) -> Vector (Double, Int)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Double, Int) -> Int) -> Comparison (Double, Int)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Double, Int) -> Int
forall a b. (a, b) -> b
snd) (Vector (Double, Int) -> Vector (Double, Int))
-> (Vector (Int, Double) -> Vector (Double, Int))
-> Vector (Int, Double)
-> Vector (Double, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Double, Int)] -> Vector (Double, Int)
forall (v :: * -> *) a. Vector v a => [a] -> v a
G.fromList ([(Double, Int)] -> Vector (Double, Int))
-> (Vector (Int, Double) -> [(Double, Int)])
-> Vector (Int, Double)
-> Vector (Double, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([(Double, (Int, Double))] -> [(Double, Int)])
-> [[(Double, (Int, Double))]] -> [(Double, Int)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [(Double, (Int, Double))] -> [(Double, Int)]
forall b b. [(Double, (b, b))] -> [(Double, b)]
f ([[(Double, (Int, Double))]] -> [(Double, Int)])
-> (Vector (Int, Double) -> [[(Double, (Int, Double))]])
-> Vector (Int, Double)
-> [(Double, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
    ((Double, (Int, Double)) -> (Double, (Int, Double)) -> Bool)
-> [(Double, (Int, Double))] -> [[(Double, (Int, Double))]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Double -> Double -> Bool)
-> ((Double, (Int, Double)) -> Double)
-> (Double, (Int, Double))
-> (Double, (Int, Double))
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ((Int, Double) -> Double
forall a b. (a, b) -> b
snd ((Int, Double) -> Double)
-> ((Double, (Int, Double)) -> (Int, Double))
-> (Double, (Int, Double))
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, (Int, Double)) -> (Int, Double)
forall a b. (a, b) -> b
snd)) ([(Double, (Int, Double))] -> [[(Double, (Int, Double))]])
-> (Vector (Int, Double) -> [(Double, (Int, Double))])
-> Vector (Int, Double)
-> [[(Double, (Int, Double))]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [(Int, Double)] -> [(Double, (Int, Double))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
averages ([(Int, Double)] -> [(Double, (Int, Double))])
-> (Vector (Int, Double) -> [(Int, Double)])
-> Vector (Int, Double)
-> [(Double, (Int, Double))]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> [(Int, Double)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList) ([Vector (Int, Double)] -> [Vector Double])
-> [Vector (Int, Double)] -> [Vector Double]
forall a b. (a -> b) -> a -> b
$
    Matrix (Int, Double) -> [Vector (Int, Double)]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix (Int, Double)
srtMat
  where
    f :: [(Double, (b, b))] -> [(Double, b)]
f [(Double
a,(b
b,b
_))] = [(Double
a,b
b)]
    f [(Double, (b, b))]
xs = let m :: Double
m = Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
U.fromList ([Double] -> Vector Double) -> [Double] -> Vector Double
forall a b. (a -> b) -> a -> b
$ ([Double], [(b, b)]) -> [Double]
forall a b. (a, b) -> a
fst (([Double], [(b, b)]) -> [Double])
-> ([Double], [(b, b)]) -> [Double]
forall a b. (a -> b) -> a -> b
$ [(Double, (b, b))] -> ([Double], [(b, b)])
forall a b. [(a, b)] -> ([a], [b])
unzip [(Double, (b, b))]
xs
           in ((Double, (b, b)) -> (Double, b))
-> [(Double, (b, b))] -> [(Double, b)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Double
_,(b
i,b
_)) -> (Double
m, b
i)) [(Double, (b, b))]
xs
    srtMat :: M.Matrix (Int, Double)
    srtMat :: Matrix (Int, Double)
srtMat = [Vector (Int, Double)] -> Matrix (Int, Double)
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector (Int, Double)] -> Matrix (Int, Double))
-> [Vector (Int, Double)] -> Matrix (Int, Double)
forall a b. (a -> b) -> a -> b
$ (Vector Double -> Vector (Int, Double))
-> [Vector Double] -> [Vector (Int, Double)]
forall a b. (a -> b) -> [a] -> [b]
map (Comparison (Int, Double)
-> Vector (Int, Double) -> Vector (Int, Double)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Int, Double) -> Double) -> Comparison (Int, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, Double) -> Double
forall a b. (a, b) -> b
snd) (Vector (Int, Double) -> Vector (Int, Double))
-> (Vector Double -> Vector (Int, Double))
-> Vector Double
-> Vector (Int, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Vector Double -> Vector (Int, Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
G.zip (Int -> Int -> Vector Int
forall (v :: * -> *) a. (Vector v a, Num a) => a -> Int -> v a
G.enumFromN Int
0 Int
n)) ([Vector Double] -> [Vector (Int, Double)])
-> [Vector Double] -> [Vector (Int, Double)]
forall a b. (a -> b) -> a -> b
$
        Matrix Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix Double
mat
    averages :: [Double]
averages = (Vector (Int, Double) -> Double)
-> [Vector (Int, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean (Vector Double -> Double)
-> (Vector (Int, Double) -> Vector Double)
-> Vector (Int, Double)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector Int, Vector Double) -> Vector Double
forall a b. (a, b) -> b
snd ((Vector Int, Vector Double) -> Vector Double)
-> (Vector (Int, Double) -> (Vector Int, Vector Double))
-> Vector (Int, Double)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> (Vector Int, Vector Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip) ([Vector (Int, Double)] -> [Double])
-> [Vector (Int, Double)] -> [Double]
forall a b. (a -> b) -> a -> b
$ Matrix (Int, Double) -> [Vector (Int, Double)]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix (Int, Double)
srtMat
    n :: Int
n = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
mat
{-# INLINE quantileNormalization #-}

-- | Columns are samples, rows are features / genes.
quantileNormalization' :: M.Matrix Double -> M.Matrix Double
quantileNormalization' :: Matrix Double -> Matrix Double
quantileNormalization' Matrix Double
mat = [Vector Double] -> Matrix Double
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector Double] -> Matrix Double)
-> [Vector Double] -> Matrix Double
forall a b. (a -> b) -> a -> b
$ Strategy (Vector Double)
-> (Vector (Int, Double) -> Vector Double)
-> [Vector (Int, Double)]
-> [Vector Double]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy (Vector Double)
forall a. Strategy a
rseq
    ((Vector Double, Vector Int) -> Vector Double
forall a b. (a, b) -> a
fst ((Vector Double, Vector Int) -> Vector Double)
-> (Vector (Int, Double) -> (Vector Double, Vector Int))
-> Vector (Int, Double)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Double, Int) -> (Vector Double, Vector Int)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip (Vector (Double, Int) -> (Vector Double, Vector Int))
-> (Vector (Int, Double) -> Vector (Double, Int))
-> Vector (Int, Double)
-> (Vector Double, Vector Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comparison (Double, Int)
-> Vector (Double, Int) -> Vector (Double, Int)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Double, Int) -> Int) -> Comparison (Double, Int)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Double, Int) -> Int
forall a b. (a, b) -> b
snd) (Vector (Double, Int) -> Vector (Double, Int))
-> (Vector (Int, Double) -> Vector (Double, Int))
-> Vector (Int, Double)
-> Vector (Double, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Double, Int)] -> Vector (Double, Int)
forall (v :: * -> *) a. Vector v a => [a] -> v a
G.fromList ([(Double, Int)] -> Vector (Double, Int))
-> (Vector (Int, Double) -> [(Double, Int)])
-> Vector (Int, Double)
-> Vector (Double, Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([(Double, (Int, Double))] -> [(Double, Int)])
-> [[(Double, (Int, Double))]] -> [(Double, Int)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [(Double, (Int, Double))] -> [(Double, Int)]
forall b b. [(Double, (b, b))] -> [(Double, b)]
f ([[(Double, (Int, Double))]] -> [(Double, Int)])
-> (Vector (Int, Double) -> [[(Double, (Int, Double))]])
-> Vector (Int, Double)
-> [(Double, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
    ((Double, (Int, Double)) -> (Double, (Int, Double)) -> Bool)
-> [(Double, (Int, Double))] -> [[(Double, (Int, Double))]]
forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Double -> Double -> Bool)
-> ((Double, (Int, Double)) -> Double)
-> (Double, (Int, Double))
-> (Double, (Int, Double))
-> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` ((Int, Double) -> Double
forall a b. (a, b) -> b
snd ((Int, Double) -> Double)
-> ((Double, (Int, Double)) -> (Int, Double))
-> (Double, (Int, Double))
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, (Int, Double)) -> (Int, Double)
forall a b. (a, b) -> b
snd)) ([(Double, (Int, Double))] -> [[(Double, (Int, Double))]])
-> (Vector (Int, Double) -> [(Double, (Int, Double))])
-> Vector (Int, Double)
-> [[(Double, (Int, Double))]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Double] -> [(Int, Double)] -> [(Double, (Int, Double))]
forall a b. [a] -> [b] -> [(a, b)]
zip [Double]
averages ([(Int, Double)] -> [(Double, (Int, Double))])
-> (Vector (Int, Double) -> [(Int, Double)])
-> Vector (Int, Double)
-> [(Double, (Int, Double))]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> [(Int, Double)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList) ([Vector (Int, Double)] -> [Vector Double])
-> [Vector (Int, Double)] -> [Vector Double]
forall a b. (a -> b) -> a -> b
$
    Matrix (Int, Double) -> [Vector (Int, Double)]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix (Int, Double)
srtMat
  where
    f :: [(Double, (b, b))] -> [(Double, b)]
f [(Double
a,(b
b,b
_))] = [(Double
a,b
b)]
    f [(Double, (b, b))]
xs = let m :: Double
m = Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
U.fromList ([Double] -> Vector Double) -> [Double] -> Vector Double
forall a b. (a -> b) -> a -> b
$ ([Double], [(b, b)]) -> [Double]
forall a b. (a, b) -> a
fst (([Double], [(b, b)]) -> [Double])
-> ([Double], [(b, b)]) -> [Double]
forall a b. (a -> b) -> a -> b
$ [(Double, (b, b))] -> ([Double], [(b, b)])
forall a b. [(a, b)] -> ([a], [b])
unzip [(Double, (b, b))]
xs
           in ((Double, (b, b)) -> (Double, b))
-> [(Double, (b, b))] -> [(Double, b)]
forall a b. (a -> b) -> [a] -> [b]
map (\(Double
_,(b
i,b
_)) -> (Double
m, b
i)) [(Double, (b, b))]
xs
    srtMat :: M.Matrix (Int, Double)
    srtMat :: Matrix (Int, Double)
srtMat = [Vector (Int, Double)] -> Matrix (Int, Double)
forall a. Context a => [Vector a] -> Matrix a
M.fromColumns ([Vector (Int, Double)] -> Matrix (Int, Double))
-> [Vector (Int, Double)] -> Matrix (Int, Double)
forall a b. (a -> b) -> a -> b
$ Strategy (Vector (Int, Double))
-> (Vector Double -> Vector (Int, Double))
-> [Vector Double]
-> [Vector (Int, Double)]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy (Vector (Int, Double))
forall a. Strategy a
rseq (Comparison (Int, Double)
-> Vector (Int, Double) -> Vector (Int, Double)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Int, Double) -> Double) -> Comparison (Int, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, Double) -> Double
forall a b. (a, b) -> b
snd) (Vector (Int, Double) -> Vector (Int, Double))
-> (Vector Double -> Vector (Int, Double))
-> Vector Double
-> Vector (Int, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Vector Double -> Vector (Int, Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
G.zip (Int -> Int -> Vector Int
forall (v :: * -> *) a. (Vector v a, Num a) => a -> Int -> v a
G.enumFromN Int
0 Int
n)) ([Vector Double] -> [Vector (Int, Double)])
-> [Vector Double] -> [Vector (Int, Double)]
forall a b. (a -> b) -> a -> b
$
        Matrix Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toColumns Matrix Double
mat
    averages :: [Double]
averages = Strategy Double
-> (Vector (Int, Double) -> Double)
-> [Vector (Int, Double)]
-> [Double]
forall b a. Strategy b -> (a -> b) -> [a] -> [b]
parMap Strategy Double
forall a. Strategy a
rseq (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean (Vector Double -> Double)
-> (Vector (Int, Double) -> Vector Double)
-> Vector (Int, Double)
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector Int, Vector Double) -> Vector Double
forall a b. (a, b) -> b
snd ((Vector Int, Vector Double) -> Vector Double)
-> (Vector (Int, Double) -> (Vector Int, Vector Double))
-> Vector (Int, Double)
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> (Vector Int, Vector Double)
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v (a, b) -> (v a, v b)
G.unzip) ([Vector (Int, Double)] -> [Double])
-> [Vector (Int, Double)] -> [Double]
forall a b. (a -> b) -> a -> b
$ Matrix (Int, Double) -> [Vector (Int, Double)]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix (Int, Double)
srtMat
    n :: Int
n = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
mat
{-# INLINE quantileNormalization' #-}