1 /++ 2 This module contains statistical inference algorithms. 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 6 Authors: Ilia Ki, John Michael Hall 7 8 Copyright: 2022 Mir Stat Authors. 9 10 +/ 11 module mir.stat.inference; 12 13 public import mir.stat.descriptive.univariate: KurtosisAlgo, Summation; 14 import mir.internal.utility: isFloatingPoint; 15 16 /++ 17 Tests that a sample comes from a normal distribution. 18 19 Params: 20 kurtosisAlgo = algorithm for calculating skewness and kurtosis (default: KurtosisAlgo.online) 21 summation = algorithm for calculating sums (default: Summation.appropriate) 22 Returns: 23 The kurtosis of the input, must be floating point 24 References: 25 D’Agostino, R. B. (1971), “An omnibus test of normality for moderate and large sample size”, Biometrika, 58, 341-348; 26 D’Agostino, R. and Pearson, E. S. (1973), “Tests for departure from normality”, Biometrika, 60, 613-622 27 +/ 28 template dAgostinoPearsonTest( 29 KurtosisAlgo kurtosisAlgo = KurtosisAlgo.online, 30 Summation summation = Summation.appropriate) 31 { 32 import std.traits: isIterable; 33 34 /++ 35 Params: 36 r = range, must be finite iterable 37 p = null hypothesis probability 38 +/ 39 F dAgostinoPearsonTest(Range, F)(Range r, out F p) 40 if(isFloatingPoint!F && isIterable!Range) 41 { 42 import core.lifetime: move; 43 import mir.stat.descriptive.univariate: KurtosisAccumulator; 44 import mir.stat.distribution.chi2: chi2CCDF; 45 import mir.math.sum: ResolveSummationType; 46 47 KurtosisAccumulator!(F, kurtosisAlgo, ResolveSummationType!(summation, Range, F)) kurtosisAccumulator = r; 48 auto kurtosisStat = kurtosisTestImpl!F(kurtosisAccumulator); 49 50 auto skewnessStat = skewnessTestImpl!F(kurtosisAccumulator); 51 auto stat = skewnessStat * skewnessStat + kurtosisStat * kurtosisStat; 52 p = chi2CCDF(stat, 2); 53 return stat; 54 } 55 } 56 57 /// ditto 58 template dAgostinoPearsonTest(string kurtosisAlgo, string summation = "appropriate") 59 { 60 mixin("alias dAgostinoPearsonTest = .dAgostinoPearsonTest!(KurtosisAlgo." ~ kurtosisAlgo ~ ", Summation." ~ summation ~ ");"); 61 } 62 63 /// 64 unittest 65 { 66 import mir.math.common: approxEqual, pow; 67 import mir.math.sum: Summation; 68 import mir.ndslice.slice: sliced; 69 import mir.test; 70 71 auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 72 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 73 74 double p; 75 x.dAgostinoPearsonTest(p).shouldApprox == 4.151936053369771; 76 p.shouldApprox == 0.12543494432988342; 77 78 p = p.nan; 79 x.dAgostinoPearsonTest!"threePass"(p).shouldApprox == 4.151936053369771; 80 p.shouldApprox == 0.12543494432988342; 81 } 82 83 private F skewnessTestImpl(F, Accumulator)(ref const Accumulator acc) 84 if (isFloatingPoint!F) 85 { 86 import mir.math.common: sqrt, log; 87 auto b2 = acc.skewness!F(true); 88 auto n = acc.count; 89 assert(n > 7, "skewnessTestImpl: count must be larger than seven"); 90 auto y = b2 * sqrt((F(n + 1) * (n + 3)) / (6 * (n - 2))); 91 auto beta2 = 3 * (F(n) * n + 27 * n - 70) * (n + 1) * (n + 3) / (F(n - 2) * (n + 5) * (n + 7) * (n + 9)); 92 auto w2 = -1 + sqrt(2 * (beta2 - 1)); 93 auto delta = 1 / sqrt(0.5f * log(w2)); 94 auto alpha = sqrt(2 / (w2 - 1)); 95 auto y_alpha = y / alpha; 96 return delta * log(y_alpha + sqrt(y_alpha * y_alpha + 1)); 97 } 98 99 version(mir_stat_test) 100 @safe pure nothrow 101 unittest 102 { 103 import mir.stat.descriptive.univariate: SkewnessAccumulator, SkewnessAlgo; 104 import mir.math.common: approxEqual, pow; 105 import mir.math.sum: Summation; 106 import mir.ndslice.slice: sliced; 107 import mir.test; 108 109 auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 110 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 111 SkewnessAccumulator!(double, SkewnessAlgo.naive, Summation.naive) v = x; 112 113 auto zsk = v.skewnessTestImpl!double; 114 zsk.shouldApprox == 1.7985465327962042; 115 import mir.stat.distribution.normal: normalCCDF; 116 auto p = zsk.normalCCDF * 2; 117 p.shouldApprox == 0.07209044155600682; 118 } 119 120 private F kurtosisTestImpl(F, Accumulator)(ref const Accumulator acc) 121 if (isFloatingPoint!F) 122 { 123 import mir.math.common: copysign, sqrt, fabs, pow; 124 auto b2 = acc.kurtosis!F(true, true); 125 auto n = acc.count; 126 assert(n > 7, "kurtosisTestImpl: count must be larger than seven"); 127 auto varb2 = F(24) * n * (F(n - 2) * (n - 3)) / (F(n + 1) * (n + 1) * F((n + 3) * (n + 5))); 128 auto x = (b2 - 3 * (n - 1) / F(n + 1)) / sqrt(varb2); 129 auto beta1sqrt = 6 * (F(n) * n - 5 * n + 2) / (F(n + 7) * (n + 9)) * sqrt((6 * (n + 3) * F(n + 5)) / (n * F(n - 2) * (n - 3))); 130 auto a = 6 + 8 / beta1sqrt * (2 / beta1sqrt + sqrt(1 + 4 / (beta1sqrt * beta1sqrt))); 131 auto t1 = 1 - 2 / (9 * a); 132 auto denom = 1 + x * sqrt(2 / (a - 4)); 133 auto t2 = pow((1 - 2 / a) / denom.fabs, 1 / F(3)).copysign(denom); 134 assert(denom); 135 return (t1 - t2) * sqrt(F(4.5) * a); 136 } 137 138 version(mir_stat_test) 139 @safe pure nothrow 140 unittest 141 { 142 import mir.stat.descriptive.univariate: KurtosisAccumulator; 143 import mir.math.common: approxEqual, pow; 144 import mir.math.sum: Summation; 145 import mir.ndslice.slice: sliced; 146 import mir.test; 147 148 auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 149 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 150 KurtosisAccumulator!(double, KurtosisAlgo.naive, Summation.naive) v = x; 151 152 auto zku = v.kurtosisTestImpl!double; 153 zku.shouldApprox == 0.9576880612895426; 154 import mir.stat.distribution.normal: normalCCDF; 155 auto p = zku.normalCCDF * 2; 156 p.shouldApprox == 0.3382200786902009; 157 }