mir.stat.distribution.hypergeometric source code

1 /++
2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution).
3 
4 There are multiple alternative parameterizations of the Hypergeometric Distribution.
5 The formulation in this module measures the number of draws (`k`) with a
6 specific feature in `n` total draws without replacement from a population of
7 size `N` such that `K` of these have the feature of interest.
8 
9 Hypergeometric distribution functions can utilize different algorithms. The 
10 default is `HypergeometricAlgo.direct`, which can be more time-consuming for
11 large values of the parameters. Additional algorithms are provided to the user
12 to choose the trade-off between running time and accuracy.
13 
14 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
15 
16 Authors: John Michael Hall
17 
18 Copyright: 2023 Mir Stat Authors.
19 
20 +/
21 
22 module mir.stat.distribution.hypergeometric;
23 
24 import mir.bignum.fp: Fp;
25 import mir.internal.utility: isFloatingPoint;
26 
27 /++
28 Algorithms used to calculate hypergeometric distribution.
29 
30 `HypergeometricAlgo.direct` can be more time-consuming for large values of the
31 parameters. Additional algorithms are provided to the user to choose the
32 trade-off between running time and accuracy.
33 +/
34 enum HypergeometricAlgo {
35     /++
36     Direct
37     +/
38     direct,
39     /++
40     Approximates hypergeometric distribution with binomial distribution.
41     +/
42     approxBinomial,
43     /++
44     Approximates hypergeometric distribution with poisson distribution (uses gamma approximation, except for inverse CDF).
45     +/
46     approxPoisson,
47     /++
48     Approximates hypergeometric distribution with normal distribution.
49     +/
50     approxNormal,
51     /++
52     Approximates hypergeometric distribution with normal distribution (including continuity correction).
53     +/
54     approxNormalContinuityCorrection,
55 }
56 
57 private
58 @safe pure @nogc nothrow
59 T hypergeometricPMFImpl(T, HypergeometricAlgo hypergeometricAlgo)
60         (const size_t k, const size_t N, const size_t K, const size_t n)
61     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.direct)
62     in (K <= N, "K must be less than or equal to N")
63     in (n <= N, "n must be less than or equal to N")
64     in (k <= n, "n - k must be greater than or equal to zero")
65     in (k <= K, "k must be less than or equal to K")
66     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
67 {
68     import mir.combinatorics: binomial;
69     return cast(T) (binomial(K, k) * binomial(N - K, n - k)) / binomial(N, n);
70 }
71 
72 private
73 @safe pure @nogc nothrow
74 T hypergeometricPMFImpl(T, HypergeometricAlgo hypergeometricAlgo)
75         (const size_t k, const size_t N, const size_t K, const size_t n)
76     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxBinomial)
77     in (K <= N, "K must be less than or equal to N")
78     in (n <= N, "n must be less than or equal to N")
79     in (k <= n, "n - k must be greater than or equal to zero")
80     in (k <= K, "k must be less than or equal to K")
81     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
82 {
83     import mir.stat.distribution.binomial: binomialPMF;
84 
85     return binomialPMF(k, n, cast(T) K / N);
86 }
87 
88 private
89 @safe pure @nogc nothrow
90 T hypergeometricPMFImpl(T, HypergeometricAlgo hypergeometricAlgo)
91         (const size_t k, const size_t N, const size_t K, const size_t n)
92     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxPoisson)
93     in (K <= N, "K must be less than or equal to N")
94     in (n <= N, "n must be less than or equal to N")
95     in (k <= n, "n - k must be greater than or equal to zero")
96     in (k <= K, "k must be less than or equal to K")
97     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
98 {
99     import mir.stat.distribution.poisson: poissonPMF, PoissonAlgo;
100 
101     return poissonPMF!(PoissonAlgo.gamma)(k, cast(T) n * K / N);
102 }
103 
104 private
105 @safe pure @nogc nothrow
106 T hypergeometricPMFImpl(T, HypergeometricAlgo hypergeometricAlgo)
107         (const size_t k, const size_t N, const size_t K, const size_t n)
108     if (isFloatingPoint!T && 
109         hypergeometricAlgo == HypergeometricAlgo.approxNormal)
110     in (K <= N, "K must be less than or equal to N")
111     in (n <= N, "n must be less than or equal to N")
112     in (k <= n, "n - k must be greater than or equal to zero")
113     in (k <= K, "k must be less than or equal to K")
114     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
115 {
116     import mir.math.common: sqrt;
117     import mir.stat.distribution.normal: normalPDF;
118 
119     T l = k;
120     return normalPDF(l, cast(T) (n * K) / N, sqrt(n * (cast(T) K / N) * (cast(T) (N - K) / N) * (cast(T) (N - n) / (N - 1))));
121 }
122 
123 private
124 @safe pure @nogc nothrow
125 T hypergeometricPMFImpl(T, HypergeometricAlgo hypergeometricAlgo)
126         (const size_t k, const size_t N, const size_t K, const size_t n)
127     if (isFloatingPoint!T && 
128         hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection)
129     in (K <= N, "K must be less than or equal to N")
130     in (n <= N, "n must be less than or equal to N")
131     in (k <= n, "n - k must be greater than or equal to zero")
132     in (k <= K, "k must be less than or equal to K")
133     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
134 {
135     import mir.math.common: sqrt;
136     import mir.stat.distribution.normal: normalCDF;
137 
138     T location = cast(T) (n * K) / N;
139     T scale = sqrt(n * (cast(T) K / N) * (cast(T) (N - K) / N) * (cast(T) (N - n) / (N - 1)));
140     return normalCDF(k + 0.5, location, scale) - normalCDF(k - 0.5, location, scale);
141 }
142 
143 /++
144 Computes the hypergeometric probability mass function (PMF).
145 
146 Additional algorithms may be provided for calculating PMF that allow trading off
147 time and accuracy. If `approxPoisson` is provided, `PoissonAlgo.gamma` is assumed.
148 
149 Params:
150     k = value to evaluate PMF (e.g. number of correct draws of object of interest)
151     N = total population size
152     K = number of relevant objects in population
153     n = number of draws
154 
155 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
156 +/
157 @safe pure @nogc nothrow
158 T hypergeometricPMF(T, HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
159         (const size_t k, const size_t N, const size_t K, const size_t n)
160     if (isFloatingPoint!T)
161     in (K <= N, "K must be less than or equal to N")
162     in (n <= N, "n must be less than or equal to N")
163     in (k <= n, "n - k must be greater than or equal to zero")
164     in (k <= K, "k must be less than or equal to K")
165     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
166 {
167     if (n == 0)
168         return 1;
169     return hypergeometricPMFImpl!(T, hypergeometricAlgo)(k, N, K, n);
170 }
171 
172 /// ditto
173 template hypergeometricPMF(HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
174 {
175     alias hypergeometricPMF = hypergeometricPMF!(double, hypergeometricAlgo);
176 }
177 
178 /// ditto
179 @safe pure nothrow @nogc
180 template hypergeometricPMF(T, string hypergeometricAlgo)
181 {
182     mixin("alias hypergeometricPMF = .hypergeometricPMF!(T, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
183 }
184 
185 /// ditto
186 @safe pure nothrow @nogc
187 template hypergeometricPMF(string hypergeometricAlgo)
188 {
189     mixin("alias hypergeometricPMF = .hypergeometricPMF!(double, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
190 }
191 
192 ///
193 @safe pure @nogc nothrow
194 version(mir_stat_test)
195 unittest
196 {
197     import mir.test: shouldApprox;
198 
199     0.hypergeometricPMF(7, 4, 3).shouldApprox == 0.02857143;
200     1.hypergeometricPMF(7, 4, 3).shouldApprox == 0.3428571;
201     2.hypergeometricPMF(7, 4, 3).shouldApprox == 0.5142857;
202     3.hypergeometricPMF(7, 4, 3).shouldApprox == 0.1142857;
203 
204     // can also provide a template argument to change output type
205     static assert(is(typeof(hypergeometricPMF!float(3, 7, 4, 3)) == float));
206 }
207 
208 // Check n=0 condition
209 @safe pure @nogc nothrow
210 version(mir_stat_test)
211 unittest
212 {
213     import mir.test: shouldApprox;
214 
215     0.hypergeometricPMF(7, 4, 0).shouldApprox == 1.0;
216 }
217 
218 /// Alternate algorithms
219 @safe pure @nogc nothrow
220 version(mir_stat_test)
221 unittest
222 {
223     import mir.test: shouldApprox;
224     import mir.math.common: exp;
225 
226     // Can approximate hypergeometric with binomial distribution
227     20.hypergeometricPMF!"approxBinomial"(750_000, 250_000, 50).shouldApprox == exp(hypergeometricLPMF(20, 750_000, 250_000, 50));
228     // Can approximate hypergeometric with poisson distribution
229     20.hypergeometricPMF!"approxPoisson"(100_000, 100, 5_000).shouldApprox == exp(hypergeometricLPMF(20, 100_000, 100, 5_000));
230     // Can approximate hypergeometric with normal distribution
231     3_500.hypergeometricPMF!"approxNormal"(10_000, 7_500, 5_000).shouldApprox == exp(hypergeometricLPMF(3_500, 10_000, 7_500, 5_000));
232     // Can approximate hypergeometric with normal distribution (with continuity correction)
233     3_500.hypergeometricPMF!"approxNormalContinuityCorrection"(10_000, 7_500, 5_000).shouldApprox == exp(hypergeometricLPMF(3_500, 10_000, 7_500, 5_000));
234 }
235 
236 /++
237 Computes the hypergeometric probability mass function (PMF) with extended 
238 floating point types (e.g. `Fp!128`), which provides additional accuracy for
239 large values of `k`, `N`, `K`, or `n`. 
240 
241 Params:
242     k = value to evaluate PMF (e.g. number of correct draws of object of interest)
243     N = total population size
244     K = number of relevant objects in population
245     n = number of draws
246 
247 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
248 +/
249 @safe pure @nogc nothrow
250 T fp_hypergeometricPMF(T = Fp!128)(const size_t k, const size_t N, const size_t K, const size_t n)
251     if (is(T == Fp!size, size_t size))
252     in (K <= N, "K must be less than or equal to N")
253     in (n <= N, "n must be less than or equal to N")
254     in (k <= n, "n - k must be greater than or equal to zero")
255     in (k <= K, "k must be less than or equal to K")
256     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
257 {
258     import mir.math.numeric: binomialCoefficient;
259     return binomialCoefficient(K, cast(const uint) k) * 
260            binomialCoefficient(N - K, cast(const uint) (n - k)) / 
261            binomialCoefficient(N, cast(const uint) n);
262 }
263 
264 ///
265 @safe pure @nogc nothrow
266 version(mir_stat_test)
267 unittest
268 {
269     import mir.bignum.fp: Fp, fp_log;
270     import mir.test: shouldApprox;
271 
272     enum size_t val = 1_000_000;
273     size_t N = val + 5;
274     size_t K = val / 2;
275     size_t n = val / 100;
276     0.fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(0, N, K, n);
277     1.fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(1, N, K, n);
278     2.fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(2, N, K, n);
279     5.fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(5, N, K, n);
280     (n / 2).fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(n / 2, N, K, n);
281     (n - 5).fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(n - 5, N, K, n);
282     (n - 2).fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(n - 2, N, K, n);
283     (n - 1).fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(n - 1, N, K, n);
284     n.fp_hypergeometricPMF(N, K, n).fp_log!double.shouldApprox == hypergeometricLPMF(n, N, K, n);
285 }
286 
287 private
288 @safe pure @nogc nothrow
289 T hypergeometricCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
290         (const size_t k, const size_t N, const size_t K, const size_t n)
291     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.direct)
292     in (K <= N, "K must be less than or equal to N")
293     in (n <= N, "n must be less than or equal to N")
294     in (k <= n, "n - k must be greater than or equal to zero")
295     in (k <= K, "k must be less than or equal to K")
296     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
297 {
298     import mir.combinatorics: binomial;
299 
300     T result = 0;
301     const denominator = binomial(N, n);
302     if (k <= n / 2) {
303         foreach (size_t i; 0 .. (k + 1)) {
304             result += cast(T) (binomial(K, i) * binomial(N - K, n - i)) / denominator;
305         }
306     } else {
307         result = 1 - hypergeometricCCDFImpl!(T, hypergeometricAlgo)(k, N, K, n);
308     }
309     return result;
310 }
311 
312 private
313 @safe pure @nogc nothrow
314 T hypergeometricCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
315         (const size_t k, const size_t N, const size_t K, const size_t n)
316     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxBinomial)
317     in (K <= N, "K must be less than or equal to N")
318     in (n <= N, "n must be less than or equal to N")
319     in (k <= n, "n - k must be greater than or equal to zero")
320     in (k <= K, "k must be less than or equal to K")
321     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
322 {
323     import mir.stat.distribution.binomial: binomialCDF;
324 
325     return binomialCDF(k, n, cast(T) K / N);
326 }
327 
328 private
329 @safe pure @nogc nothrow
330 T hypergeometricCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
331         (const size_t k, const size_t N, const size_t K, const size_t n)
332     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxPoisson)
333     in (K <= N, "K must be less than or equal to N")
334     in (n <= N, "n must be less than or equal to N")
335     in (k <= n, "n - k must be greater than or equal to zero")
336     in (k <= K, "k must be less than or equal to K")
337     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
338 {
339     import mir.stat.distribution.poisson: poissonCDF, PoissonAlgo;
340 
341     return poissonCDF!(PoissonAlgo.gamma)(k, cast(T) n * K / N);
342 }
343 
344 private
345 @safe pure @nogc nothrow
346 T hypergeometricCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
347         (const size_t k, const size_t N, const size_t K, const size_t n)
348     if (isFloatingPoint!T && 
349         (hypergeometricAlgo == HypergeometricAlgo.approxNormal || 
350          hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection))
351     in (K <= N, "K must be less than or equal to N")
352     in (n <= N, "n must be less than or equal to N")
353     in (k <= n, "n - k must be greater than or equal to zero")
354     in (k <= K, "k must be less than or equal to K")
355     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
356 {
357     import mir.math.common: sqrt;
358     import mir.stat.distribution.normal: normalCDF;
359 
360     T l = k;
361     static if (hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection) {
362         l += 0.5;
363     }
364     return normalCDF(l, cast(T) (n * K) / N, sqrt(n * (cast(T) K / N) * (cast(T) (N - K) / N) * (cast(T) (N - n) / (N - 1))));
365 }
366 
367 /++
368 Computes the hypergeometric cumulative distribution function (CDF).
369 
370 Additional algorithms may be provided for calculating CDF that allow trading off
371 time and accuracy. If `approxPoisson` is provided, `PoissonAlgo.gamma` is assumed.
372 
373 Setting `hypergeometricAlgo = HypergeometricAlgo.direct` results in direct
374 summation being used, which can result in significant slowdowns for large values
375 of `k`. 
376 
377 Params:
378     k = value to evaluate CDF (e.g. number of correct draws of object of interest)
379     N = total population size
380     K = number of relevant objects in population
381     n = number of draws
382 
383 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
384 +/
385 @safe pure @nogc nothrow
386 T hypergeometricCDF(T, HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
387         (const size_t k, const size_t N, const size_t K, const size_t n)
388     if (isFloatingPoint!T)
389     in (K <= N, "K must be less than or equal to N")
390     in (n <= N, "n must be less than or equal to N")
391     in (k <= n, "n - k must be greater than or equal to zero")
392     in (k <= K, "k must be less than or equal to K")
393     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
394 {
395     if (k == n || n == 0)
396         return 1;
397     return hypergeometricCDFImpl!(T, hypergeometricAlgo)(k, N, K, n);
398 }
399 
400 /// ditto
401 template hypergeometricCDF(HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
402 {
403     alias hypergeometricCDF = hypergeometricCDF!(double, hypergeometricAlgo);
404 }
405 
406 /// ditto
407 @safe pure nothrow @nogc
408 template hypergeometricCDF(T, string hypergeometricAlgo)
409 {
410     mixin("alias hypergeometricCDF = .hypergeometricCDF!(T, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
411 }
412 
413 /// ditto
414 @safe pure nothrow @nogc
415 template hypergeometricCDF(string hypergeometricAlgo)
416 {
417     mixin("alias hypergeometricCDF = .hypergeometricCDF!(double, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
418 }
419 
420 ///
421 @safe pure @nogc nothrow
422 version(mir_stat_test)
423 unittest
424 {
425     import mir.test: shouldApprox;
426 
427     0.hypergeometricCDF(7, 4, 3).shouldApprox == 0.02857143;
428     1.hypergeometricCDF(7, 4, 3).shouldApprox == 0.3714286;
429     2.hypergeometricCDF(7, 4, 3).shouldApprox == 0.8857143;
430     3.hypergeometricCDF(7, 4, 3).shouldApprox == 1.0;
431 
432     // can also provide a template argument to change output type
433     static assert(is(typeof(hypergeometricCDF!float(3, 7, 4, 3)) == float));
434 }
435 
436 // Check n=0 condition
437 @safe pure @nogc nothrow
438 version(mir_stat_test)
439 unittest
440 {
441     import mir.test: shouldApprox;
442 
443     0.hypergeometricCDF(7, 4, 0).shouldApprox == 1.0;
444 }
445 
446 /// Alternate algorithms
447 @safe pure @nogc nothrow
448 version(mir_stat_test)
449 unittest
450 {
451     import mir.test: shouldApprox;
452 
453     // Can approximate hypergeometric with binomial distribution
454     20.hypergeometricCDF!"approxBinomial"(750_000, 250_000, 50).shouldApprox(1e-2) == 0.8740839;
455     // Can approximate hypergeometric with poisson distribution
456     8.hypergeometricCDF!"approxPoisson"(100_000, 100, 5_000).shouldApprox(1e-2) == 0.9370063;
457     // Can approximate hypergeometric with normal distribution
458     3_750.hypergeometricCDF!"approxNormal"(10_000, 7_500, 5_000).shouldApprox(2e-2) == 0.5092122;
459     // Can approximate hypergeometric with normal distribution
460     3_750.hypergeometricCDF!"approxNormalContinuityCorrection"(10_000, 7_500, 5_000).shouldApprox(1e-2) == 0.5092122;
461 }
462 
463 private
464 @safe pure @nogc nothrow
465 T hypergeometricCCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
466         (const size_t k, const size_t N, const size_t K, const size_t n)
467     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.direct)
468     in (K <= N, "K must be less than or equal to N")
469     in (n <= N, "n must be less than or equal to N")
470     in (k <= n, "n - k must be greater than or equal to zero")
471     in (k <= K, "k must be less than or equal to K")
472     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
473 {
474     import mir.combinatorics: binomial;
475 
476     T result = 0;
477     const denominator = binomial(N, n);
478     if (k > n / 2) {
479         foreach (size_t i; (k + 1) .. (n + 1)) {
480             result += cast(T) (binomial(K, i) * binomial(N - K, n - i)) / denominator;
481         }
482     } else {
483         result = 1 - hypergeometricCDFImpl!(T, hypergeometricAlgo)(k, N, K, n);
484     }
485     return result;
486 }
487 
488 private
489 @safe pure @nogc nothrow
490 T hypergeometricCCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
491         (const size_t k, const size_t N, const size_t K, const size_t n)
492     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxBinomial)
493     in (K <= N, "K must be less than or equal to N")
494     in (n <= N, "n must be less than or equal to N")
495     in (k <= n, "n - k must be greater than or equal to zero")
496     in (k <= K, "k must be less than or equal to K")
497     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
498 {
499     import mir.stat.distribution.binomial: binomialCCDF;
500 
501     return binomialCCDF(k, n, cast(T) K / N);
502 }
503 
504 private
505 @safe pure @nogc nothrow
506 T hypergeometricCCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
507         (const size_t k, const size_t N, const size_t K, const size_t n)
508     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxPoisson)
509     in (K <= N, "K must be less than or equal to N")
510     in (n <= N, "n must be less than or equal to N")
511     in (k <= n, "n - k must be greater than or equal to zero")
512     in (k <= K, "k must be less than or equal to K")
513     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
514 {
515     import mir.stat.distribution.poisson: poissonCCDF, PoissonAlgo;
516 
517     return poissonCCDF!(PoissonAlgo.gamma)(k, cast(T) n * K / N);
518 }
519 
520 private
521 @safe pure @nogc nothrow
522 T hypergeometricCCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
523         (const size_t k, const size_t N, const size_t K, const size_t n)
524     if (isFloatingPoint!T && 
525         (hypergeometricAlgo == HypergeometricAlgo.approxNormal || 
526          hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection))
527     in (K <= N, "K must be less than or equal to N")
528     in (n <= N, "n must be less than or equal to N")
529     in (k <= n, "n - k must be greater than or equal to zero")
530     in (k <= K, "k must be less than or equal to K")
531     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
532 {
533     import mir.math.common: sqrt;
534     import mir.stat.distribution.normal: normalCCDF;
535 
536     T l = k;
537     static if (hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection) {
538         l += 0.5;
539     }
540     return normalCCDF(l, cast(T) (n * K) / N, sqrt(n * (cast(T) K / N) * (cast(T) (N - K) / N) * (cast(T) (N - n) / (N - 1))));
541 }
542 
543 /++
544 Computes the hypergeometric complementary cumulative distribution function (CCDF).
545 
546 Additional algorithms may be provided for calculating CCDF that allow trading off
547 time and accuracy. If `approxPoisson` is provided, `PoissonAlgo.gamma` is assumed.
548 
549 Setting `hypergeometricAlgo = HypergeometricAlgo.direct` results in direct
550 summation being used, which can result in significant slowdowns for large values
551 of `k`. 
552 
553 Params:
554     k = value to evaluate CCDF (e.g. number of correct draws of object of interest)
555     N = total population size
556     K = number of relevant objects in population
557     n = number of draws
558 
559 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
560 +/
561 @safe pure @nogc nothrow
562 T hypergeometricCCDF(T, HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
563         (const size_t k, const size_t N, const size_t K, const size_t n)
564     if (isFloatingPoint!T)
565     in (K <= N, "K must be less than or equal to N")
566     in (n <= N, "n must be less than or equal to N")
567     in (k <= n, "n - k must be greater than or equal to zero")
568     in (k <= K, "k must be less than or equal to K")
569     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
570 {
571     if (k == n || n == 0)
572         return 0;
573     return hypergeometricCCDFImpl!(T, hypergeometricAlgo)(k, N, K, n);
574 }
575 
576 /// ditto
577 template hypergeometricCCDF(HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
578 {
579     alias hypergeometricCCDF = hypergeometricCCDF!(double, hypergeometricAlgo);
580 }
581 
582 /// ditto
583 @safe pure nothrow @nogc
584 template hypergeometricCCDF(T, string hypergeometricAlgo)
585 {
586     mixin("alias hypergeometricCCDF = .hypergeometricCCDF!(T, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
587 }
588 
589 /// ditto
590 @safe pure nothrow @nogc
591 template hypergeometricCCDF(string hypergeometricAlgo)
592 {
593     mixin("alias hypergeometricCCDF = .hypergeometricCCDF!(double, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
594 }
595 
596 ///
597 @safe pure @nogc nothrow
598 version(mir_stat_test)
599 unittest
600 {
601     import mir.test: shouldApprox;
602 
603     0.hypergeometricCCDF(7, 4, 3).shouldApprox == 0.9714286;
604     1.hypergeometricCCDF(7, 4, 3).shouldApprox == 0.6285714;
605     2.hypergeometricCCDF(7, 4, 3).shouldApprox == 0.1142857;
606     3.hypergeometricCCDF(7, 4, 3).shouldApprox == 0.0;
607 
608     // can also provide a template argument to change output type
609     static assert(is(typeof(hypergeometricCCDF!float(3, 7, 4, 3)) == float));
610 }
611 
612 // Check n=0 condition
613 @safe pure @nogc nothrow
614 version(mir_stat_test)
615 unittest
616 {
617     import mir.test: shouldApprox;
618 
619     0.hypergeometricCCDF(7, 4, 0).shouldApprox == 0.0;
620 }
621 
622 /// Alternate algorithms
623 @safe pure @nogc nothrow
624 version(mir_stat_test)
625 unittest
626 {
627     import mir.test: shouldApprox;
628     import mir.math.common: exp;
629 
630     // Can approximate hypergeometric with binomial distribution
631     20.hypergeometricCCDF!"approxBinomial"(750_000, 250_000, 50).shouldApprox(1e-2) == 0.1259161;
632     // Can approximate hypergeometric with poisson distribution
633     8.hypergeometricCCDF!"approxPoisson"(100_000, 100, 5_000).shouldApprox(1e-1) == 0.0629937;
634     // Can approximate hypergeometric with normal distribution
635     3_750.hypergeometricCCDF!"approxNormal"(10_000, 7_500, 5_000).shouldApprox(2e-2) == 0.4907878;
636     // Can approximate hypergeometric with normal distribution
637     3_750.hypergeometricCCDF!"approxNormalContinuityCorrection"(10_000, 7_500, 5_000).shouldApprox(1e-2) == 0.4907878;
638 }
639 
640 private
641 @safe pure nothrow @nogc
642 size_t hypergeometricInvCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
643         (const T p, const size_t N, const size_t K, const size_t n)
644     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.direct)
645     in (K <= N, "K must be less than or equal to N")
646     in (n <= N, "n must be less than or equal to N")
647     in (p >= 0, "p must be greater than or equal to 0")
648     in (p <= 1, "p must be less than or equal to 1")
649 {
650     size_t guess = 0;
651     if (n < 0.1 * N) {
652         if ((n * N > 9 * (N - K)) &&
653             (n * (N - K) > 9 * K)) {
654         	guess = hypergeometricInvCDFImpl!(T, HypergeometricAlgo.approxNormalContinuityCorrection)(p, N, K, n);
655     	} else if (K < 0.1 * N) {
656         	guess = hypergeometricInvCDFImpl!(T, HypergeometricAlgo.approxPoisson)(p, N, K, n);
657         } else {
658             guess = hypergeometricInvCDFImpl!(T, HypergeometricAlgo.approxBinomial)(p, N, K, n);
659         }
660     }
661     T cdfGuess = hypergeometricCDF!(T, hypergeometricAlgo)(guess, N, K, n);
662 
663     if (p <= cdfGuess) {
664         if (guess == 0) {
665             return guess;
666         }
667         for (size_t i = (guess - 1); guess >= 0; i--) {
668             cdfGuess -= hypergeometricPMF!(T, hypergeometricAlgo)(i + 1, N, K, n);
669             if (p > cdfGuess) {
670                 guess = i + 1;
671                 break;
672             }
673         }
674     } else {
675         while(p > cdfGuess) {
676             guess++;
677             cdfGuess += hypergeometricPMF!(T, hypergeometricAlgo)(guess, N, K, n);
678         }
679     }
680     return guess;
681 }
682 
683 private
684 @safe pure @nogc nothrow
685 size_t hypergeometricInvCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
686         (const T p, const size_t N, const size_t K, const size_t n)
687     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxBinomial)
688     in (K <= N, "K must be less than or equal to N")
689     in (n <= N, "n must be less than or equal to N")
690     in (p >= 0, "p must be greater than or equal to 0")
691     in (p <= 1, "p must be less than or equal to 1")
692 {
693     import mir.stat.distribution.binomial: binomialInvCDF;
694 
695     return binomialInvCDF(p, n, cast(T) K / N);
696 }
697 
698 private
699 @safe pure @nogc nothrow
700 size_t hypergeometricInvCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
701         (const T p, const size_t N, const size_t K, const size_t n)
702     if (isFloatingPoint!T && hypergeometricAlgo == HypergeometricAlgo.approxPoisson)
703     in (K <= N, "K must be less than or equal to N")
704     in (n <= N, "n must be less than or equal to N")
705     in (p >= 0, "p must be greater than or equal to 0")
706     in (p <= 1, "p must be less than or equal to 1")
707 {
708     import mir.stat.distribution.poisson: poissonInvCDF, PoissonAlgo;
709 
710     // Using PoissonAlgo.direct because PoissonAlgo.gamma does not return the
711     // same result
712     return poissonInvCDF!(PoissonAlgo.direct)(p, cast(T) n * K / N);
713 }
714 
715 private
716 @safe pure @nogc nothrow
717 size_t hypergeometricInvCDFImpl(T, HypergeometricAlgo hypergeometricAlgo)
718         (const T p, const size_t N, const size_t K, const size_t n)
719     if (isFloatingPoint!T && 
720         (hypergeometricAlgo == HypergeometricAlgo.approxNormal || 
721          hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection))
722     in (K <= N, "K must be less than or equal to N")
723     in (n <= N, "n must be less than or equal to N")
724     in (p >= 0, "p must be greater than or equal to 0")
725     in (p <= 1, "p must be less than or equal to 1")
726 {
727     import mir.math.common: floor, sqrt;
728     import mir.stat.distribution.normal: normalCDF, normalInvCDF;
729 
730     T location = cast(T) (n * K) / N;
731     T scale = sqrt(n * (cast(T) K / N) * (cast(T) (N - K) / N) * (cast(T) (N - n) / (N - 1)));
732 
733     // Handles case where p is small or large, better than just using pLowerBound = 0 or pUpperBound = 1
734     T pLowerBound = 0;
735     T pUpperBound = 1;
736     T lowerValue = 0;
737     T upperValue = K;
738     static if (hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection) {
739         lowerValue += 0.5;
740         upperValue -= 0.5;
741     }
742     pLowerBound = normalCDF(lowerValue, location, scale);
743     pUpperBound = normalCDF(upperValue, location, scale);
744     if (p <= pLowerBound) {
745         return 0;
746     } else if (p >= pUpperBound) {
747         return K;
748     }
749 
750     auto result = normalInvCDF(p, location, scale);
751     static if (hypergeometricAlgo == HypergeometricAlgo.approxNormalContinuityCorrection) {
752         result = result - 0.5;
753     }
754     return cast(size_t) floor(result);
755 }
756 
757 /++
758 Computes the hypergeometric inverse cumulative distribution function (InvCDF).
759 
760 Additional algorithms may be provided for calculating InvCDF that allow trading off
761 time and accuracy. If `approxPoisson` is provided, `PoissonAlgo.direct` is assumed.
762 This is different from other functions that use `PoissonAlgo.gamma` since in this
763 case it does not provide the same result.
764 
765 Setting `hypergeometricAlgo = HypergeometricAlgo.direct` results in direct
766 summation being used, which can result in significant slowdowns for large values
767 of `k`. 
768 
769 Params:
770     p = value to evaluate InvCDF
771     N = total population size
772     K = number of relevant objects in population
773     n = number of draws
774 
775 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
776 +/
777 @safe pure @nogc nothrow
778 size_t hypergeometricInvCDF(T, HypergeometricAlgo hypergeometricAlgo = HypergeometricAlgo.direct)
779         (const T p, const size_t N, const size_t K, const size_t n)
780     if (isFloatingPoint!T)
781     in (K <= N, "K must be less than or equal to N")
782     in (n <= N, "n must be less than or equal to N")
783     in (p >= 0, "p must be greater than or equal to 0")
784     in (p <= 1, "p must be less than or equal to 1")
785 {
786     if (n == 0 || p == 0) {
787         return 0;
788     } else if (p == 1) {
789         return K;
790     }
791     return hypergeometricInvCDFImpl!(T, hypergeometricAlgo)(p, N, K, n);
792 }
793 
794 /// ditto
795 @safe pure nothrow @nogc
796 template hypergeometricInvCDF(T, string hypergeometricAlgo)
797 {
798     mixin("alias hypergeometricInvCDF = .hypergeometricInvCDF!(T, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
799 }
800 
801 /// ditto
802 @safe pure nothrow @nogc
803 template hypergeometricInvCDF(string hypergeometricAlgo)
804 {
805     mixin("alias hypergeometricInvCDF = .hypergeometricInvCDF!(double, HypergeometricAlgo." ~ hypergeometricAlgo ~ ");");
806 }
807 
808 ///
809 @safe pure @nogc nothrow
810 version(mir_stat_test)
811 unittest
812 {
813     import mir.test: should;
814 
815     0.0.hypergeometricInvCDF(40, 15, 20).should == 0;
816     0.1.hypergeometricInvCDF(40, 15, 20).should == 6;
817     0.2.hypergeometricInvCDF(40, 15, 20).should == 6;
818     0.3.hypergeometricInvCDF(40, 15, 20).should == 7;
819     0.4.hypergeometricInvCDF(40, 15, 20).should == 7;
820     0.5.hypergeometricInvCDF(40, 15, 20).should == 7;
821     0.6.hypergeometricInvCDF(40, 15, 20).should == 8;
822     0.7.hypergeometricInvCDF(40, 15, 20).should == 8;
823     0.8.hypergeometricInvCDF(40, 15, 20).should == 9;
824     0.9.hypergeometricInvCDF(40, 15, 20).should == 9;
825     1.0.hypergeometricInvCDF(40, 15, 20).should == 15;
826 }
827 
828 // Check n=0 condition
829 @safe pure @nogc nothrow
830 version(mir_stat_test)
831 unittest
832 {
833     import mir.test: should;
834 
835     0.0.hypergeometricInvCDF(7, 4, 0).should == 0;
836     0.5.hypergeometricInvCDF(7, 4, 0).should == 0;
837     1.0.hypergeometricInvCDF(7, 4, 0).should == 0;
838 }
839 
840 /// Alternate algorithms
841 @safe pure @nogc nothrow
842 version(mir_stat_test)
843 unittest
844 {
845     import mir.test: shouldApprox;
846     import mir.math.common: exp;
847 
848     // Can approximate hypergeometric with binomial distribution
849     0.5.hypergeometricInvCDF!"approxBinomial"(750_000, 250_000, 50).shouldApprox!double == 17;
850     // Can approximate hypergeometric with poisson distribution
851     0.4.hypergeometricInvCDF!"approxPoisson"(100_000, 100, 5_000).shouldApprox!double == 4;
852     // Can approximate hypergeometric with normal distribution
853     0.6.hypergeometricInvCDF!"approxNormal"(10_000, 7_500, 5_000).shouldApprox!double == 3755;
854     // Can approximate hypergeometric with normal distribution
855     0.6.hypergeometricInvCDF!"approxNormalContinuityCorrection"(10_000, 7_500, 5_000).shouldApprox!double(1) == 3755;
856 }
857 
858 // test approxNormal / approxNormalContinuityCorrection
859 version(mir_stat_test)
860 @safe pure nothrow @nogc
861 unittest {
862     import mir.math.common: floor, sqrt;
863     import mir.stat.distribution.normal: normalInvCDF;
864     import mir.test: should;
865 
866     size_t N = 50;
867     size_t K = 15;
868     size_t n = 25;
869     double location = cast(double) (n * K) / N;
870     double scale = sqrt(n * (cast(double) K / N) * (cast(double) (N - K) / N) * (cast(double) (N - n) / (N - 1)));
871     0.0000001.hypergeometricInvCDF!"approxNormal"(N, K, n).should == 0;
872     0.0000001.hypergeometricInvCDF!"approxNormalContinuityCorrection"(N, K, n).should == 0;
873     0.9999999.hypergeometricInvCDF!"approxNormal"(N, K, n).should == K;
874     0.9999999.hypergeometricInvCDF!"approxNormalContinuityCorrection"(N, K, n).should == K;
875     double checkValue;
876     for (double x = 0.05; x < 1; x = x + 0.05) {
877         checkValue = normalInvCDF(x, location, scale);
878         x.hypergeometricInvCDF!"approxNormal"(N, K, n).should == floor(checkValue);
879         x.hypergeometricInvCDF!"approxNormalContinuityCorrection"(N, K, n).should == floor(checkValue - 0.5);
880     }
881 }
882 
883 // test alternate direct guess paths
884 version(mir_stat_test)
885 @safe pure nothrow @nogc
886 unittest {
887     import mir.math.common: approxEqual;
888 
889     static immutable size_t[] Ns = [120, 100, 200];
890     static immutable size_t[] Ks = [ 50,  50,   9];
891     static immutable size_t[] ns = [ 10,   9,   1];
892 
893     size_t value;
894     for (size_t i; i < Ns.length; i++) {
895         for (double x = 0.01; x < 1; x = x + 0.01) {
896             value = x.hypergeometricInvCDF(Ns[i], Ks[i], ns[i]);
897             assert(value.hypergeometricCDF(Ns[i], Ks[i], ns[i]) >= x);
898             if (value > 1)
899                 assert((value - 1).hypergeometricCDF(Ns[i], Ks[i], ns[i]) < x);
900         }
901     }
902 }
903 
904 /++
905 Computes the hypergeometric log probability mass function (LPMF).
906 
907 Params:
908     k = value to evaluate LPMF (e.g. number of correct draws of object of interest)
909     N = total population size
910     K = number of relevant objects in population
911     n = number of draws
912 
913 See_also: $(LINK2 https://en.wikipedia.org/wiki/Hypergeometric_distribution, Hypergeometric Distribution)
914 +/
915 @safe pure @nogc nothrow
916 T hypergeometricLPMF(T = double)(const size_t k, const size_t N, const size_t K, const size_t n)
917     if (isFloatingPoint!T)
918     in (K <= N, "K must be less than or equal to N")
919     in (n <= N, "n must be less than or equal to N")
920     in (k <= n, "n - k must be greater than or equal to zero")
921     in (k <= K, "k must be less than or equal to K")
922     in (k + N >= n + K, "`N - K` must be greater than or equal to `n - k`")
923 {
924     import mir.math.internal.log_binomial: logBinomialCoefficient;
925     
926     return logBinomialCoefficient(K, cast(const uint) k) + 
927            logBinomialCoefficient(N - K, cast(const uint) (n - k)) - 
928            logBinomialCoefficient(N, cast(const uint) n);
929 }
930 
931 ///
932 @safe pure @nogc nothrow
933 version(mir_stat_test)
934 unittest
935 {
936     import mir.math.common: log;
937     import mir.test: shouldApprox;
938 
939     0.hypergeometricLPMF(7, 4, 3).shouldApprox == log(0.02857143);
940     1.hypergeometricLPMF(7, 4, 3).shouldApprox == log(0.3428571);
941     2.hypergeometricLPMF(7, 4, 3).shouldApprox == log(0.5142857);
942     3.hypergeometricLPMF(7, 4, 3).shouldApprox == log(0.1142857);
943 }
The OpenD Programming Language