module HNum.Stats where
import           HNum.Vector
import           HNum.CSV
type Coeff a = (a, a)
fac :: Integral a => a -> a
fac 0 = 1
fac 1 = 1
fac n = product [1 .. n]
facStop :: Integral a => a -> a -> a
facStop n s = product [s .. n]
p :: Integral a => a -> a -> a
n `p` r = facStop n (n  r + 1)
c :: Integral a => a -> a -> a
n `c` r = (n `p` r) `div` fac r
class VecOps v => Statistical v where
  
  mean :: Fractional a => v a -> a
  
  cov' :: Floating a => v a -> v a -> a
  
  cov :: Floating a => v a -> v a -> Matrix a
  
  var :: Floating a => v a -> a
  
  std :: Floating a => v a -> a
  
  se :: Floating a => v a -> a
  
  cor :: Floating a => v a -> v a -> a
  
  med :: (Ord a, Floating a) => v a -> a
  
  mode :: Eq a => v a -> a
  
  cv :: Floating a => v a -> a
  
  moment :: Floating a => a -> v a-> a
  
  skew :: Floating a => v a -> a
  
  skew' :: Floating a => v a -> a
  
  kurt :: Floating a => v a -> a
instance Statistical Vector where
  
  mean x = sum x / fromIntegral (length x)
  
  cov' x y
    | length x <= 1 || length y <= 1 = error "Samples are not enough"
    | length x /= length y = error "Length is not same"
    | otherwise = ((x .- mean x) .*. (y .- mean y)) / fromIntegral (length x  1)
  
  cov x y = matrix [[var x, cov' x y], [cov' y x, var y]]
  
  var v = cov' v v
  
  std = sqrt . var
  
  se x = std x / sqrt (fromIntegral (length x))
  
  cor x y = cov' x y / (std x * std y)
  
  med x | even l    = ((qs !! (l'1)) + (qs !! l')) / 2
        | otherwise = qs !! l'
    where l  = length x
          l' = l `div` 2
          qs = (toList . qsort) x
  
  mode x = v !! n
    where v  = toList x
          cx = map (`count` v) v
          m  = maximum cx
          n  = head $ dropWhile (\p -> cx !! p /= m) [0..]
  
  cv x = std x / mean x
  
  moment n x = sum ((x .- mean x) .^ n)
  
  skew x = (1 / fromIntegral l) * moment 3 x / std x ^ 3
    where l = length x
  skew' x = (fromIntegral l^2 / fromIntegral ((l1) * (l2))) * skew x
    where l = length x
  
  kurt x = moment 4 x / (fromIntegral l * std x ** 4)  3
    where l = length x
summary :: (Show a, Floating a) => DataFrame a -> IO ()
summary df = do
  putStrLn $ "Mean: " ++ show hm
  putStrLn $ "Var:  " ++ show hv
  putStrLn $ "Std:  " ++ show hs
 where
  h  = header df
  m  = matForm $ dat df
  ms = map (mean . vector) m
  vs = map (var . vector) m
  ss = map (std . vector) m
  hm = zip h ms
  hv = zip h vs
  hs = zip h ss
describe :: (Show a, Floating a, Ord a) => Vector a -> IO ()
describe v = do
  putStrLn $ "n:    " ++ show (length v)
  putStrLn $ "mean: " ++ show (mean v)
  putStrLn $ "std:  " ++ show (std v)
  putStrLn $ "med:  " ++ show (med v)
  putStrLn $ "mode: " ++ show (mode v)
  putStrLn $ "min:  " ++ show (minimum v)
  putStrLn $ "max:  " ++ show (maximum v)
  putStrLn $ "skew: " ++ show (skew v)
  putStrLn $ "kurt: " ++ show (kurt v)
  putStrLn $ "SE:   " ++ show (se v)
lm :: Floating a => Vector a -> Vector a -> Coeff a
lm x y = (my  b1 * mx, b1)
 where
  mx = mean x
  my = mean y
  b1 = (x .- mx) .*. (y .- my) / ((x .- mx) .*. (x .- mx))
lineFit :: Floating a => Coeff a -> Vector a -> Vector a
lineFit (n, m) x = x .* m .+ n
rss :: Floating a => Vector a -> Vector a -> a
rss x y = sum ((y  lineFit (lm x y) x) .^ 2)
rse :: Floating a => Vector a -> Vector a -> a
rse x y = sqrt (1 / fromIntegral (length x  2) * rss x y)
sma :: Fractional a => Int -> Vector a -> Vector a
sma p v = vec $ take (p  1) v' ++ sma' p v'
 where
  v' = toList v
  sma' :: Fractional a => Int -> [a] -> [a]
  sma' p x
    | length x < p
    = []
    | otherwise
    = let m = sum (take p x) / fromIntegral p in m : sma' p (tail x)
count :: Eq a => a -> [a] -> Int
count p v = length (filter (== p) v)