The OpenD Programming Language

1 /++
2 This module contains statistical inference algorithms.
3 
4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
5 
6 Authors: Ilia Ki, John Michael Hall
7 
8 Copyright: 2022 Mir Stat Authors.
9 
10 +/
11 module mir.stat.inference;
12 
13 public import mir.stat.descriptive.univariate: KurtosisAlgo, Summation;
14 import mir.internal.utility: isFloatingPoint;
15 
16 /++
17 Tests that a sample comes from a normal distribution.
18 
19 Params:
20     kurtosisAlgo = algorithm for calculating skewness and kurtosis (default: KurtosisAlgo.online)
21     summation = algorithm for calculating sums (default: Summation.appropriate)
22 Returns:
23     The kurtosis of the input, must be floating point
24 References:
25     D’Agostino, R. B. (1971), “An omnibus test of normality for moderate and large sample size”, Biometrika, 58, 341-348;
26     D’Agostino, R. and Pearson, E. S. (1973), “Tests for departure from normality”, Biometrika, 60, 613-622
27 +/
28 template dAgostinoPearsonTest(
29     KurtosisAlgo kurtosisAlgo = KurtosisAlgo.online, 
30     Summation summation = Summation.appropriate)
31 {
32     import std.traits: isIterable;
33 
34     /++
35     Params:
36         r = range, must be finite iterable
37         p = null hypothesis probability
38     +/
39     F dAgostinoPearsonTest(Range, F)(Range r, out F p)
40         if(isFloatingPoint!F && isIterable!Range)
41     {
42         import core.lifetime: move;
43         import mir.stat.descriptive.univariate: KurtosisAccumulator;
44         import mir.stat.distribution.chi2: chi2CCDF;
45         import mir.math.sum: ResolveSummationType;
46 
47         KurtosisAccumulator!(F, kurtosisAlgo, ResolveSummationType!(summation, Range, F)) kurtosisAccumulator = r;
48         auto kurtosisStat = kurtosisTestImpl!F(kurtosisAccumulator);
49 
50         auto skewnessStat = skewnessTestImpl!F(kurtosisAccumulator);
51         auto stat = skewnessStat * skewnessStat + kurtosisStat * kurtosisStat;
52         p = chi2CCDF(stat, 2);
53         return stat;
54     }
55 }
56 
57 /// ditto
58 template dAgostinoPearsonTest(string kurtosisAlgo, string summation = "appropriate")
59 {
60     mixin("alias dAgostinoPearsonTest = .dAgostinoPearsonTest!(KurtosisAlgo." ~ kurtosisAlgo ~ ", Summation." ~ summation ~ ");");
61 }
62 
63 ///
64 unittest
65 {
66     import mir.math.common: approxEqual, pow;
67     import mir.math.sum: Summation;
68     import mir.ndslice.slice: sliced;
69     import mir.test;
70 
71     auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25,
72               2.0, 7.5, 5.0, 1.0, 1.5, 0.0];
73 
74     double p;
75     x.dAgostinoPearsonTest(p).shouldApprox == 4.151936053369771;
76     p.shouldApprox == 0.12543494432988342;
77 
78     p = p.nan;
79     x.dAgostinoPearsonTest!"threePass"(p).shouldApprox == 4.151936053369771;
80     p.shouldApprox == 0.12543494432988342;
81 }
82 
83 private F skewnessTestImpl(F, Accumulator)(ref const Accumulator acc)
84     if (isFloatingPoint!F)
85 {
86     import mir.math.common: sqrt, log;
87     auto b2 = acc.skewness!F(true);
88     auto n = acc.count;
89     assert(n > 7, "skewnessTestImpl: count must be larger than seven");
90     auto y = b2 * sqrt((F(n + 1) * (n + 3)) / (6 * (n - 2)));
91     auto beta2 = 3 * (F(n) * n + 27 * n - 70) * (n + 1) * (n + 3) / (F(n - 2) * (n + 5) * (n + 7) * (n + 9));
92     auto w2 = -1 + sqrt(2 * (beta2 - 1));
93     auto delta = 1 / sqrt(0.5f * log(w2));
94     auto alpha = sqrt(2 / (w2 - 1));
95     auto y_alpha = y / alpha;
96     return delta * log(y_alpha + sqrt(y_alpha * y_alpha + 1));
97 }
98 
99 version(mir_stat_test)
100 @safe pure nothrow
101 unittest
102 {
103     import mir.stat.descriptive.univariate: SkewnessAccumulator, SkewnessAlgo;
104     import mir.math.common: approxEqual, pow;
105     import mir.math.sum: Summation;
106     import mir.ndslice.slice: sliced;
107     import mir.test;
108 
109     auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25,
110               2.0, 7.5, 5.0, 1.0, 1.5, 0.0];
111     SkewnessAccumulator!(double, SkewnessAlgo.naive, Summation.naive) v = x;
112 
113     auto zsk = v.skewnessTestImpl!double;
114     zsk.shouldApprox == 1.7985465327962042;
115     import mir.stat.distribution.normal: normalCCDF;
116     auto p = zsk.normalCCDF * 2;
117     p.shouldApprox == 0.07209044155600682;
118 }
119 
120 private F kurtosisTestImpl(F, Accumulator)(ref const Accumulator acc)
121     if (isFloatingPoint!F)
122 {
123     import mir.math.common: copysign, sqrt, fabs, pow;
124     auto b2 = acc.kurtosis!F(true, true);
125     auto n = acc.count;
126     assert(n > 7, "kurtosisTestImpl: count must be larger than seven");
127     auto varb2 = F(24) * n * (F(n - 2) * (n - 3)) / (F(n + 1) * (n + 1) * F((n + 3) * (n + 5)));
128     auto x = (b2 - 3 * (n - 1) / F(n + 1)) / sqrt(varb2);
129     auto beta1sqrt = 6 * (F(n) * n - 5 * n + 2) / (F(n + 7) * (n + 9)) * sqrt((6 * (n + 3) * F(n + 5)) / (n * F(n - 2) * (n - 3)));
130     auto a = 6 + 8 / beta1sqrt * (2 / beta1sqrt + sqrt(1 + 4 / (beta1sqrt * beta1sqrt)));
131     auto t1 = 1 - 2 / (9 * a);
132     auto denom = 1 + x * sqrt(2 / (a - 4));
133     auto t2 = pow((1 - 2 / a) / denom.fabs, 1 / F(3)).copysign(denom);
134     assert(denom);
135     return (t1 - t2) * sqrt(F(4.5) * a);
136 }
137 
138 version(mir_stat_test)
139 @safe pure nothrow
140 unittest
141 {
142     import mir.stat.descriptive.univariate: KurtosisAccumulator;
143     import mir.math.common: approxEqual, pow;
144     import mir.math.sum: Summation;
145     import mir.ndslice.slice: sliced;
146     import mir.test;
147 
148     auto x = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25,
149               2.0, 7.5, 5.0, 1.0, 1.5, 0.0];
150     KurtosisAccumulator!(double, KurtosisAlgo.naive, Summation.naive) v = x;
151 
152     auto zku = v.kurtosisTestImpl!double;
153     zku.shouldApprox == 0.9576880612895426;
154     import mir.stat.distribution.normal: normalCCDF;
155     auto p = zku.normalCCDF * 2;
156     p.shouldApprox == 0.3382200786902009;
157 }