{-# LANGUAGE CPP              #-}
{-# LANGUAGE FlexibleContexts #-}
{-|
Module      : Statistics.Test.Levene
Description : Levene's test for homogeneity of variances.
Copyright   : (c) Praneya Kumar, Alexey Khudyakov, 2025
License     : BSD-3-Clause

Levene's test used to check whether samples have equal variance. Null
hypothesis is all samples are from distributions with same variance
(homoscedacity). Test is robust to non-normality, and versatile with
mean or median centering.

>>> import qualified Data.Vector.Unboxed as VU
>>> import Statistics.Test.Levene
>>> :{
let a = VU.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
    b = VU.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
    c = VU.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
in levenesTest Median [a, b, c]
:}
Right (Test {testSignificance = mkPValue 2.4315059672496814e-3, testStatistics = 7.584952754501659, testDistribution = fDistributionReal 2.0 27.0})
-}
module Statistics.Test.Levene (
    Center(..),
    levenesTest
) where

import Control.Monad
import qualified Data.Vector           as V
import qualified Data.Vector.Unboxed   as VU
import qualified Data.Vector.Generic   as VG
import qualified Data.Vector.Storable  as VS
import qualified Data.Vector.Primitive as VP
#if MIN_VERSION_vector(0,13,2)
import qualified Data.Vector.Strict    as VV
#endif
import Statistics.Distribution (complCumulative)
import Statistics.Distribution.FDistribution (fDistribution, FDistribution)
import Statistics.Types      (mkPValue)
import Statistics.Test.Types (Test(..))
import Statistics.Function   (gsort)
import Statistics.Sample     (mean)

import qualified Statistics.Sample.Internal as IS
import Statistics.Quantile


-- | Center calculation method
data Center
  = Mean             -- ^ Use arithmetic mean
  | Median           -- ^ Use median
  | Trimmed !Double  -- ^ Trimmed mean with given proportion to cut from each end
  deriving (Center -> Center -> Bool
(Center -> Center -> Bool)
-> (Center -> Center -> Bool) -> Eq Center
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Center -> Center -> Bool
== :: Center -> Center -> Bool
$c/= :: Center -> Center -> Bool
/= :: Center -> Center -> Bool
Eq, Int -> Center -> ShowS
[Center] -> ShowS
Center -> String
(Int -> Center -> ShowS)
-> (Center -> String) -> ([Center] -> ShowS) -> Show Center
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Center -> ShowS
showsPrec :: Int -> Center -> ShowS
$cshow :: Center -> String
show :: Center -> String
$cshowList :: [Center] -> ShowS
showList :: [Center] -> ShowS
Show)

-- | Main Levene's test function with full error handling
levenesTest
  :: (VG.Vector v Double)
  => Center      -- ^ Centering method
  -> [v Double]  -- ^ Input samples
  -> Either String (Test FDistribution)
{-# INLINABLE levenesTest #-}
levenesTest :: forall (v :: * -> *).
Vector v Double =>
Center -> [v Double] -> Either String (Test FDistribution)
levenesTest Center
center [v Double]
samples
  | [v Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [v Double]
samples Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2 = String -> Either String (Test FDistribution)
forall a b. a -> Either a b
Left String
"At least two samples required"
  -- NOTE: We don't have nice way of computing mean of a list!
  | Bool
otherwise = do
      let residuals :: [Residuals]
residuals = Center -> v Double -> Residuals
forall (v :: * -> *).
Vector v Double =>
Center -> v Double -> Residuals
computeResiduals Center
center (v Double -> Residuals) -> [v Double] -> [Residuals]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [v Double]
samples
      -- Average of all Z
      let n_tot :: Int
n_tot = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Vector Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (Vector Double -> Int)
-> (Residuals -> Vector Double) -> Residuals -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Residuals -> Vector Double
vecZ (Residuals -> Int) -> [Residuals] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Residuals]
residuals -- Total number of samples
      let zbar :: Double
zbar = [Double] -> Double
forall (f :: * -> *). Foldable f => f Double -> Double
IS.sumF [ Residuals -> Double
meanZ Residuals
z Double -> Double -> Double
forall a. Num a => a -> a -> a
* Residuals -> Double
sampleN Residuals
z
                         | Residuals
z <- [Residuals]
residuals]
               Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n_tot
      -- Numerator: Sum over (ni * (Z[i] - Z)^2)
      let numerator :: Double
numerator = [Double] -> Double
forall (f :: * -> *). Foldable f => f Double -> Double
IS.sumF [ Residuals -> Double
sampleN Residuals
z Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
sqr (Residuals -> Double
meanZ Residuals
z Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
zbar)
                              | Residuals
z <- [Residuals]
residuals]
      -- Denominator: Sum over Σ((dev_ij - zbari)^2)
      let denominator :: Double
denominator = [Double] -> Double
forall (f :: * -> *). Foldable f => f Double -> Double
IS.sumF
            [ Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
IS.sum (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (Double -> Double
sqr (Double -> Double) -> (Double -> Double) -> Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double -> Double
forall a. Num a => a -> a -> a
subtract (Residuals -> Double
meanZ Residuals
z)) (Residuals -> Vector Double
vecZ Residuals
z)
            | Residuals
z <- [Residuals]
residuals
            ]
      -- Handle division by zero and invalid values
      Bool -> Either String () -> Either String ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Double
denominator Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
0 Bool -> Bool -> Bool
|| Double -> Bool
forall a. RealFloat a => a -> Bool
isNaN Double
denominator Bool -> Bool -> Bool
|| Double -> Bool
forall a. RealFloat a => a -> Bool
isInfinite Double
denominator)
        (Either String () -> Either String ())
-> Either String () -> Either String ()
forall a b. (a -> b) -> a -> b
$ String -> Either String ()
forall a b. a -> Either a b
Left String
"Invalid denominator in W-statistic calculation"
      let wStat :: Double
wStat = (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n_tot Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
numerator Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
denominator)
          fDist :: FDistribution
fDist = Int -> Int -> FDistribution
fDistribution (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
n_tot Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k)
      Test FDistribution -> Either String (Test FDistribution)
forall a b. b -> Either a b
Right Test { testStatistics :: Double
testStatistics   = Double
wStat
                 , testSignificance :: PValue Double
testSignificance = Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ FDistribution -> Double -> Double
forall d. Distribution d => d -> Double -> Double
complCumulative FDistribution
fDist Double
wStat
                 , testDistribution :: FDistribution
testDistribution = FDistribution
fDist
                 }
  where
    k :: Int
k = [v Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [v Double]
samples -- Number of groups
{-# SPECIALIZE levenesTest :: Center -> [V.Vector  Double] -> Either String (Test FDistribution) #-}
{-# SPECIALIZE levenesTest :: Center -> [VU.Vector Double] -> Either String (Test FDistribution) #-}
{-# SPECIALIZE levenesTest :: Center -> [VS.Vector Double] -> Either String (Test FDistribution) #-}
{-# SPECIALIZE levenesTest :: Center -> [VP.Vector Double] -> Either String (Test FDistribution) #-}
#if MIN_VERSION_vector(0,13,2)
{-# SPECIALIZE levenesTest :: Center -> [VV.Vector Double] -> Either String (Test FDistribution) #-}
#endif

----------------------------------------------------------------
-- Implementation
----------------------------------------------------------------

-- | Trim data from both ends with error handling and performance optimization
trimboth :: (Ord a, Fractional a, VG.Vector v a)
         => v a
         -> Double
         -> v a
{-# INLINE trimboth #-}
trimboth :: forall a (v :: * -> *).
(Ord a, Fractional a, Vector v a) =>
v a -> Double -> v a
trimboth v a
vec Double
p
  | Double
p Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0 Bool -> Bool -> Bool
|| Double
p Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
0.5 = String -> v a
forall a. HasCallStack => String -> a
error String
"Statistics.Test.Levene: trimming: proportion must be between 0 and 0.5"
  | v a -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
VG.null v a
vec       = v a
vec
  | Bool
otherwise         = Int -> Int -> v a -> v a
forall (v :: * -> *) a.
(HasCallStack, Vector v a) =>
Int -> Int -> v a -> v a
VG.slice Int
lowerCut (Int
upperCut Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
lowerCut) v a
sorted
  where
    n :: Int
n        = v a -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length v a
vec
    sorted :: v a
sorted   = v a -> v a
forall e (v :: * -> *). (Ord e, Vector v e) => v e -> v e
gsort v a
vec
    lowerCut :: Int
lowerCut = Double -> Int
forall b. Integral b => Double -> b
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
p Double -> Double -> Double
forall a. Num a => a -> a -> a
* Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n
    upperCut :: Int
upperCut = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
lowerCut

data Residuals = Residuals
  { Residuals -> Double
sampleN :: !Double
  , Residuals -> Double
meanZ   :: !Double
  , Residuals -> Vector Double
vecZ    :: !(VU.Vector Double)
  }

computeResiduals
  :: VG.Vector v Double
  => Center
  -> v Double
  -> Residuals
{-# INLINE computeResiduals #-}
computeResiduals :: forall (v :: * -> *).
Vector v Double =>
Center -> v Double -> Residuals
computeResiduals Center
method v Double
xs = case Center
method of
  Center
Mean   ->
    let c :: Double
c  = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
xs
        zs :: Vector Double
zs = (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (\Double
x -> Double -> Double
forall a. Num a => a -> a
abs (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
c)) (Vector Double -> Vector Double) -> Vector Double -> Vector Double
forall a b. (a -> b) -> a -> b
$ v Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert v Double
xs
    in Vector Double -> Residuals
makeR Vector Double
zs
  Center
Median ->
    let c :: Double
c  = ContParam -> v Double -> Double
forall (v :: * -> *).
Vector v Double =>
ContParam -> v Double -> Double
median ContParam
medianUnbiased v Double
xs
        zs :: Vector Double
zs = (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (\Double
x -> Double -> Double
forall a. Num a => a -> a
abs (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
c)) (Vector Double -> Vector Double) -> Vector Double -> Vector Double
forall a b. (a -> b) -> a -> b
$ v Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert v Double
xs
    in Vector Double -> Residuals
makeR Vector Double
zs
  Trimmed Double
p ->
    let trimmed :: v Double
trimmed = v Double -> Double -> v Double
forall a (v :: * -> *).
(Ord a, Fractional a, Vector v a) =>
v a -> Double -> v a
trimboth v Double
xs Double
p
        c :: Double
c       = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean v Double
trimmed
        zs :: Vector Double
zs      = (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
VU.map (\Double
x -> Double -> Double
forall a. Num a => a -> a
abs (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
c)) (Vector Double -> Vector Double) -> Vector Double -> Vector Double
forall a b. (a -> b) -> a -> b
$ v Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
VU.convert v Double
trimmed
    in Vector Double -> Residuals
makeR Vector Double
zs
  where
    makeR :: Vector Double -> Residuals
makeR Vector Double
zs = Residuals { sampleN :: Double
sampleN = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Int
forall a. Unbox a => Vector a -> Int
VU.length Vector Double
zs
                         , meanZ :: Double
meanZ   = Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
zs
                         , vecZ :: Vector Double
vecZ    = Vector Double
zs
                         }

sqr :: Double -> Double
sqr :: Double -> Double
sqr Double
x = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
x