{-# LANGUAGE CPP              #-}
{-# LANGUAGE FlexibleContexts #-}
{-|
Module      : Statistics.Test.Bartlett
Description : Bartlett's test for homogeneity of variances.
Copyright   : (c) Praneya Kumar, Alexey Khudyakov, 2025
License     : BSD-3-Clause

Bartlett's test is used to check that multiple groups of observations
come from distributions with equal variances. This test assumes that
samples come from normal distribution. If this is not the case it may
simple test for non-normality and Levene's ("Statistics.Test.Levene")
is preferred

>>> import qualified Data.Vector.Unboxed as VU
>>> import Statistics.Test.Bartlett
>>> :{
let a = VU.fromList [8.88, 9.12, 9.04, 8.98, 9.00, 9.08, 9.01, 8.85, 9.06, 8.99]
    b = VU.fromList [8.88, 8.95, 9.29, 9.44, 9.15, 9.58, 8.36, 9.18, 8.67, 9.05]
    c = VU.fromList [8.95, 9.12, 8.95, 8.85, 9.03, 8.84, 9.07, 8.98, 8.86, 8.98]
in bartlettTest [a,b,c]
:}
Right (Test {testSignificance = mkPValue 1.1254782518843598e-5, testStatistics = 22.789434813726768, testDistribution = chiSquared 2})

-}
module Statistics.Test.Bartlett (
    bartlettTest,
    module Statistics.Distribution.ChiSquared
) where

import qualified Data.Vector           as V
import qualified Data.Vector.Unboxed   as VU
import qualified Data.Vector.Generic   as VG
import qualified Data.Vector.Storable  as VS
import qualified Data.Vector.Primitive as VP
#if MIN_VERSION_vector(0,13,2)
import qualified Data.Vector.Strict    as VV
#endif

import Statistics.Distribution (complCumulative)
import Statistics.Distribution.ChiSquared (chiSquared, ChiSquared(..))
import Statistics.Sample (varianceUnbiased)
import Statistics.Types (mkPValue)
import Statistics.Test.Types (Test(..))

-- | Perform Bartlett's test for equal variances. The input is a list
--   of vectors, where each vector represents a group of observations.
bartlettTest :: VG.Vector v Double => [v Double] -> Either String (Test ChiSquared)
bartlettTest :: forall (v :: * -> *).
Vector v Double =>
[v Double] -> Either String (Test ChiSquared)
bartlettTest [v Double]
groups
  | [v Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [v Double]
groups Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2                 = String -> Either String (Test ChiSquared)
forall a b. a -> Either a b
Left String
"At least two groups are required for Bartlett's test."
  | (v Double -> Bool) -> [v Double] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2) (Int -> Bool) -> (v Double -> Int) -> v Double -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length) [v Double]
groups    = String -> Either String (Test ChiSquared)
forall a b. a -> Either a b
Left String
"Each group must have at least two observations."
  | (Var -> Bool) -> [Var] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any ((Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
0) (Double -> Bool) -> (Var -> Double) -> Var -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Var -> Double
var) [Var]
groupVariances = String -> Either String (Test ChiSquared)
forall a b. a -> Either a b
Left String
"All groups must have positive variance."
  | Bool
otherwise = Test ChiSquared -> Either String (Test ChiSquared)
forall a b. b -> Either a b
Right Test
      { testSignificance :: PValue Double
testSignificance = PValue Double
pValue
      , testStatistics :: Double
testStatistics   = Double
tStatistic
      , testDistribution :: ChiSquared
testDistribution = ChiSquared
chiDist
      }
  where
    -- Number of groups
    k :: Int
k = [v Double] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [v Double]
groups
    -- Sample sizes for each group
    ni :: [Double]
ni  = (v Double -> Double) -> [v Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (v Double -> Int) -> v Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length) [v Double]
groups
    -- Total number of observations across all groups
    n_tot :: Int
n_tot = [Int] -> Int
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int) -> (v Double -> Int) -> v Double -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length (v Double -> Int) -> [v Double] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [v Double]
groups
    -- Variance estimates
    groupVariances :: [Var]
groupVariances = v Double -> Var
forall (v :: * -> *). Vector v Double => v Double -> Var
toVar (v Double -> Var) -> [v Double] -> [Var]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [v Double]
groups
    sumWeightedVars :: Double
sumWeightedVars = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
v | Var{sampleN :: Var -> Double
sampleN=Double
n, var :: Var -> Double
var=Double
v} <- [Var]
groupVariances ]
    pooledVariance :: Double
pooledVariance  = Double
sumWeightedVars Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n_tot Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k)
    -- Numerator of Bartlett's statistic
    numerator :: Double
numerator =
      Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n_tot Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log Double
pooledVariance Double -> Double -> Double
forall a. Num a => a -> a -> a
-
      [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log Double
v | Var{sampleN :: Var -> Double
sampleN=Double
n, var :: Var -> Double
var=Double
v} <- [Var]
groupVariances ]
    -- Denominator correction term
    sumReciprocals :: Double
sumReciprocals = [Double] -> Double
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) | Double
n <- [Double]
ni]
    denomCorrection :: Double
denomCorrection =
      Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (Double
sumReciprocals Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
n_tot Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k)) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
3 Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
k Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1))

    -- Test statistic and test distrubution
    tStatistic :: Double
tStatistic = Double -> Double -> Double
forall a. Ord a => a -> a -> a
max Double
0 (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
numerator Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
denomCorrection
    chiDist :: ChiSquared
chiDist    = Int -> ChiSquared
chiSquared (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    pValue :: PValue Double
pValue     = Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ ChiSquared -> Double -> Double
forall d. Distribution d => d -> Double -> Double
complCumulative ChiSquared
chiDist Double
tStatistic
{-# SPECIALIZE bartlettTest :: [V.Vector  Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VU.Vector Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VS.Vector Double] -> Either String (Test ChiSquared) #-}
{-# SPECIALIZE bartlettTest :: [VP.Vector Double] -> Either String (Test ChiSquared) #-}
#if MIN_VERSION_vector(0,13,2)
{-# SPECIALIZE bartlettTest :: [VV.Vector Double] -> Either String (Test ChiSquared) #-}
#endif

-- Estimate of variance
data Var = Var
  { Var -> Double
sampleN :: !Double -- ^ N of elements
  , Var -> Double
var     :: !Double -- ^ Sample variance
  }

toVar :: VG.Vector v Double => v Double -> Var
toVar :: forall (v :: * -> *). Vector v Double => v Double -> Var
toVar v Double
xs = Var { sampleN :: Double
sampleN = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ v Double -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
VG.length v Double
xs
               , var :: Double
var     = v Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
varianceUnbiased v Double
xs
               }