mir.stat.distribution.negative_binomial source code

1 /++
2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution).
3 
4 There are multiple alternative formulations of the Negative Binomial Distribution. The
5 formulation in this module uses the number of Bernoulli trials until `r` successes. 
6 
7 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
8 
9 Authors: John Michael Hall
10 
11 Copyright: 2022-3 Mir Stat Authors.
12 
13 +/
14 
15 module mir.stat.distribution.negative_binomial;
16 
17 import mir.bignum.fp: Fp;
18 import mir.internal.utility: isFloatingPoint;
19 
20 /++
21 Computes the negative binomial probability mass function (PMF).
22 
23 Params:
24     k = value to evaluate PMF (e.g. number of "heads")
25     r = number of successes until stopping
26     p = `true` probability
27 
28 
29 See_also:
30     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
31 +/
32 @safe pure nothrow @nogc
33 T negativeBinomialPMF(T)(const size_t k, const size_t r, const T p)
34     if (isFloatingPoint!T)
35     in (r > 0, "number of failures must be larger than zero")
36     in (p >= 0, "p must be greater than or equal to 0")
37     in (p <= 1, "p must be less than or equal to 1")
38 {
39     import mir.math.common: pow;
40     import mir.combinatorics: binomial;
41 
42     return binomial(k + r - 1, r - 1) * pow(1 - p, k) * pow(p, r);
43 }
44 
45 ///
46 version(mir_stat_test)
47 @safe pure nothrow @nogc
48 unittest {
49     import mir.test: shouldApprox;
50 
51     4.negativeBinomialPMF(6, 3.0 / 4).shouldApprox == 0.0875988;
52 }
53 
54 //
55 version(mir_stat_test)
56 @safe pure nothrow @nogc
57 unittest {
58     import mir.test: shouldApprox;
59 
60     0.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0877915;
61     1.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.175583;
62     2.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.2048468;
63     3.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.1820861;
64     4.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.1365645;
65     5.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.09104303;
66     6.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.05563741;
67     7.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0317928;
68     8.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0172211;
69     9.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.008929461;
70     10.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.00446473;
71 }
72 
73 /++
74 Computes the  negative binomial probability mass function (PMF) directly with extended 
75 floating point types (e.g. `Fp!128`), which provides additional accuracy for
76 extreme values of `k`, `r`, or `p`. 
77 
78 Params:
79     k = value to evaluate PMF (e.g. number of "heads")
80     r = number of successes until stopping
81     p = `true` probability
82 
83 See_also:
84     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
85 +/
86 @safe pure nothrow @nogc
87 T fp_negativeBinomialPMF(T)(const size_t k, const size_t r, const T p)
88     if (is(T == Fp!size, size_t size))
89     in (r > 0, "number of failures must be larger than zero")
90     in (cast(double) p >= 0, "p must be greater than or equal to 0")
91     in (cast(double) p <= 1, "p must be less than or equal to 1")
92 {
93     import mir.math.internal.fp_powi: fp_powi;
94     import mir.math.numeric: binomialCoefficient;
95 
96     return binomialCoefficient(k + r - 1, cast(const uint) (r - 1)) * fp_powi(T(1 - cast(double) p), k) * fp_powi(p, r);
97 }
98 
99 /// fp_binomialPMF provides accurate values for large values of `n`
100 version(mir_stat_test_fp)
101 @safe pure nothrow @nogc
102 unittest {
103     import mir.bignum.fp: Fp, fp_log;
104     import mir.test: shouldApprox;
105 
106     1.fp_negativeBinomialPMF(1_000_000, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(1, 1_000_000, 0.75);
107 }
108 
109 
110 // test more values
111 version(mir_stat_test_fp)
112 @safe pure nothrow @nogc
113 unittest {
114     import mir.bignum.fp: Fp, fp_log;
115     import mir.test: shouldApprox;
116 
117     enum size_t val = 1_000_000;
118 
119     0.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(0, val + 5, 0.75);
120     1.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(1, val + 5, 0.75);
121     2.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(2, val + 5, 0.75);
122     5.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(5, val + 5, 0.75);
123     (val / 2).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val / 2, val + 5, 0.75);
124     (val - 5).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 5, val + 5, 0.75);
125     (val - 2).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 2, val + 5, 0.75);
126     (val - 1).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 1, val + 5, 0.75);
127     (val - 0).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val, val + 5, 0.75);
128 }
129 
130 // using Fp!128
131 version(mir_stat_test)
132 @safe pure nothrow @nogc
133 unittest {
134     import mir.conv: to;
135     import mir.test: shouldApprox;
136 
137     for (size_t i; i <= 5; i++) {
138         i.fp_negativeBinomialPMF(5, Fp!128(0.50)).to!double.shouldApprox == negativeBinomialPMF(i, 5, 0.50);
139         i.fp_negativeBinomialPMF(5, Fp!128(0.75)).to!double.shouldApprox == negativeBinomialPMF(i, 5, 0.75);
140     }
141 }
142 
143 /++
144 Computes the negative binomial cumulative distribution function (CDF).
145 
146 Params:
147     k = value to evaluate CDF (e.g. number of "heads")
148     r = number of successes until stopping
149     p = `true` probability
150 
151 See_also:
152     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
153 +/
154 @safe pure nothrow @nogc
155 T negativeBinomialCDF(T)(const size_t k, const size_t r, const T p)
156     if (isFloatingPoint!T)
157     in (r > 0, "number of failures must be larger than zero")
158     in (p >= 0, "p must be greater than or equal to 0")
159     in (p <= 1, "p must be less than or equal to 1")
160 {
161     import mir.math.common: pow;
162     import std.mathspecial: betaIncomplete;
163 
164     if (k == 0) {
165         return pow(p, r);
166     }
167     return 1 - betaIncomplete(k + 1, r, 1 - p);
168 }
169 
170 ///
171 version(mir_stat_test)
172 @safe pure nothrow @nogc
173 unittest {
174     import mir.test: shouldApprox;
175 
176     4.negativeBinomialCDF(6, 3.0 / 4).shouldApprox == 0.9218731;
177 }
178 
179 // test multiple
180 version(mir_stat_test)
181 @safe pure nothrow @nogc
182 unittest {
183     import mir.test: shouldApprox;
184     
185     static double sumOfnegativeBinomialPMFs(T)(size_t k, size_t r, T p) {
186         double result = 0.0;
187         for (size_t i; i <= k; i++) {
188             result += negativeBinomialPMF(i, r, p);
189         }
190         return result;
191     }
192 
193     for (size_t i; i <= 10; i++) {
194         i.negativeBinomialCDF(5, 0.25).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.25);
195         i.negativeBinomialCDF(5, 0.50).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.50);
196         i.negativeBinomialCDF(5, 0.75).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.75);
197 
198         i.negativeBinomialCDF(6, 0.25).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.25);
199         i.negativeBinomialCDF(6, 0.5).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.5);
200         i.negativeBinomialCDF(6, 0.75).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.75);
201     }
202 }
203 
204 /++
205 Computes the negative binomial complementary cumulative distribution function (CCDF).
206 
207 Params:
208     k = value to evaluate CCDF (e.g. number of "heads")
209     r = number of successes until stopping
210     p = `true` probability
211 
212 See_also:
213     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
214 +/
215 @safe pure nothrow @nogc
216 T negativeBinomialCCDF(T)(const size_t k, const size_t r, const T p)
217     if (isFloatingPoint!T)
218     in (r > 0, "number of failures must be larger than zero")
219     in (p >= 0, "p must be greater than or equal to 0")
220     in (p <= 1, "p must be less than or equal to 1")
221 {
222     import mir.math.common: pow;
223     import std.mathspecial: betaIncomplete;
224 
225     if (k == 0) {
226         return 1 - pow(p, r);
227     }
228     return betaIncomplete(k + 1, r, 1 - p);
229 }
230 
231 ///
232 version(mir_stat_test)
233 @safe pure nothrow @nogc
234 unittest {
235     import mir.test: shouldApprox;
236 
237     4.negativeBinomialCCDF(6, 3.0 / 4).shouldApprox == 0.07812691;
238 }
239 
240 // test multiple
241 version(mir_stat_test)
242 @safe pure nothrow @nogc
243 unittest {
244     import mir.test: shouldApprox;
245 
246     for (size_t i; i <= 10; i++) {
247         i.negativeBinomialCCDF(5, 0.25).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.25);
248         i.negativeBinomialCCDF(5, 0.50).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.50);
249         i.negativeBinomialCCDF(5, 0.75).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.75);
250 
251         i.negativeBinomialCCDF(6, 0.25).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.25);
252         i.negativeBinomialCCDF(6, 0.5).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.5);
253         i.negativeBinomialCCDF(6, 0.75).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.75);
254     }
255 }
256 
257 private
258 @safe pure nothrow @nogc
259 size_t negativeBinomialInvCDFSearch(T)(const size_t guess, ref T cdfGuess, const T q, const size_t r, const T p, const size_t searchIncrement)
260     if (isFloatingPoint!T)
261     in (r > 0, "number of failures must be larger than zero")
262     in (q >= 0, "q must be greater than or equal to 0")
263     in (q <= 1, "q must be less than or equal to 1")
264     in (p >= 0, "p must be greater than or equal to 0")
265     in (p <= 1, "p must be less than or equal to 1")
266 {
267     size_t guessNew = guess;
268     if (q <= cdfGuess) {
269         T cdfGuessPrevious;
270         while (guessNew > 0) {
271             cdfGuessPrevious = cdfGuess;
272             cdfGuess = negativeBinomialCDF(guessNew - searchIncrement, r, p);
273             if (q > cdfGuess) {
274                 cdfGuess = cdfGuessPrevious;
275                 break;
276             }
277             guessNew = guessNew > searchIncrement ? guessNew - searchIncrement : 0;
278         }
279     } else {
280         while (q > cdfGuess) {
281             guessNew = guessNew + searchIncrement;
282             cdfGuess = negativeBinomialCDF(guessNew, r, p);
283         }
284     }
285     return guessNew;
286 }
287 
288 /++
289 Computes the negative binomial inverse cumulative distribution function (InvCDF).
290 
291 Params:
292     q = value to evaluate InvCDF
293     r = number of successes until stopping
294     p = `true` probability
295 
296 See_also:
297     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
298 +/
299 @safe pure nothrow @nogc
300 size_t negativeBinomialInvCDF(T)(const T q, const size_t r, const T p)
301     if (isFloatingPoint!T)
302     in (r > 0, "number of failures must be larger than zero")
303     in (q >= 0, "q must be greater than or equal to 0")
304     in (q <= 1, "q must be less than or equal to 1")
305     in (p >= 0, "p must be greater than or equal to 0")
306     in (p <= 1, "p must be less than or equal to 1")
307 {
308     import mir.math.common: floor, sqrt;
309     import mir.stat.distribution.normal: normalInvCDF;
310 
311     if (q == 0) {
312         return 0;
313     } else if (q == 1) {
314         return size_t.max;
315     }
316 
317     size_t guess = 0;
318     T mu = r * (1 - p) / p;
319     T pre_std = sqrt(r * (1 - p));
320     T std = pre_std / p;
321     T z = normalInvCDF(q);
322     if (r > 20 && p > 0.25 && p < 0.75) {
323         guess = cast(size_t) floor(mu + std * z - 0.5);
324     } else {
325         // Cornish-Fisher Approximation
326         guess = cast(size_t) floor(mu + std * (z + ((2 - p) / pre_std) * (z * z - 1) / 6));
327     }
328     T cdfGuess = negativeBinomialCDF(guess, r, p);
329 
330     if (guess < 10_000) {
331         return negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, 1);
332     } else {
333         // Faster search for large values of guess
334         size_t searchIncrement = cast(size_t) floor(guess * 0.001);
335         size_t searchIncrementPrevious;
336         do {
337             searchIncrementPrevious = searchIncrement;
338             guess = negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, searchIncrement);
339             searchIncrement = cast(size_t) floor(searchIncrement * 0.01);
340         } while (searchIncrementPrevious > 0 && searchIncrement > guess * (10 * T.epsilon));
341         if (searchIncrementPrevious <= 1) {
342             return guess;
343         } else {
344             return negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, 1);
345         }
346     }
347 }
348 
349 ///
350 version(mir_stat_test)
351 @safe pure nothrow @nogc
352 unittest {
353     import mir.test: should;
354     0.9.negativeBinomialInvCDF(6, 3.0 / 4).should == 4;
355 }
356 
357 //
358 version(mir_stat_test)
359 @safe pure nothrow
360 unittest {
361     import mir.test: should;
362 
363     0.negativeBinomialInvCDF(5, 0.6).should == 0;
364     1.negativeBinomialInvCDF(5, 0.6).should == size_t.max;
365 
366     for (double x = 0.05; x < 1; x = x + 0.05) {
367         size_t value = x.negativeBinomialInvCDF(5, 0.6);
368         value.negativeBinomialCDF(5, 0.6).should!"a >= b"(x);
369         if (value > 0) {
370             (value - 1).negativeBinomialCDF(5, 0.6).should!"a < b"(x);
371         }
372     }
373 }
374 
375 // alternate guess paths
376 version(mir_stat_test)
377 @safe pure nothrow
378 unittest {
379     import mir.test: should;
380 
381     static immutable size_t[] ns = [  25,  37,  34,    25,     25,   105];
382     static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025];
383 
384     size_t value;
385     for (size_t i; i < 6; i++) {
386         for (double x = 0.05; x < 1; x = x + 0.05) {
387             value = x.negativeBinomialInvCDF(ns[i], ps[i]);
388             negativeBinomialCDF(value, ns[i], ps[i]).should!"a >= b"(x);
389             if (value > 0) {
390                 negativeBinomialCDF(value - 1, ns[i], ps[i]).should!"a < b"(x);
391             }
392         }
393     }
394 }
395 
396 // more detailed guess paths
397 version(mir_stat_test_binom_multi)
398 @safe pure nothrow
399 unittest {
400     import mir.test: should;
401 
402     static immutable size_t[] ns = [  25,  37,  34,    25,     25,   105];
403     static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025];
404 
405     size_t value;
406     for (size_t i; i < 6; i++) {
407         for (double x = 0.01; x < 1; x = x + 0.01) {
408             value = x.negativeBinomialInvCDF(ns[i], ps[i]);
409             negativeBinomialCDF(value, ns[i], ps[i]).should!"a >= b"(x);
410             if (value > 0) {
411                 negativeBinomialCDF(value - 1, ns[i], ps[i]).should!"a < b"(x);
412             }
413         }
414     }
415 }
416 
417 /++
418 Computes the negative binomial log probability mass function (LPMF).
419 
420 Params:
421     k = value to evaluate PMF (e.g. number of "heads")
422     r = number of successes until stopping
423     p = `true` probability
424 
425 See_also:
426     $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution)
427 +/
428 @safe pure nothrow @nogc
429 T negativeBinomialLPMF(T)(const size_t k, const size_t r, const T p)
430     if (isFloatingPoint!T)
431     in (r > 0, "number of failures must be larger than zero")
432     in (p >= 0, "p must be greater than or equal to 0")
433     in (p <= 1, "p must be less than or equal to 1")
434 {
435     import mir.math.internal.xlogy: xlogy, xlog1py;
436     import mir.math.internal.log_binomial: logBinomialCoefficient;
437 
438     return logBinomialCoefficient(k + r - 1, cast(const uint) (r - 1)) + xlog1py(k, -p) + xlogy(r, p);
439 }
440 
441 ///
442 version(mir_stat_test)
443 @safe pure nothrow @nogc
444 unittest {
445     import mir.math.common: exp;
446     import mir.test: shouldApprox;
447 
448     4.negativeBinomialLPMF(6, 3.0 / 4).exp.shouldApprox == 4.negativeBinomialPMF(6, 3.0 / 4);
449 }
450 
451 //
452 version(mir_stat_test)
453 @safe pure nothrow @nogc
454 unittest {
455     import mir.math.common: exp;
456     import mir.test: shouldApprox;
457 
458     for (size_t i; i <= 10; i++) {
459         i.negativeBinomialLPMF(5, 0.5).exp.shouldApprox == negativeBinomialPMF(i, 5, 0.5);
460         i.negativeBinomialLPMF(5, 0.75).exp.shouldApprox == negativeBinomialPMF(i, 5, 0.75);
461     }
462 }
463 
464 /// Accurate values for large values of `n`
465 version(mir_stat_test_fp)
466 @safe pure nothrow @nogc
467 unittest {
468     import mir.bignum.fp: Fp, fp_log;
469     import mir.test: shouldApprox;
470 
471     enum size_t val = 1_000_000;
472 
473     1.negativeBinomialLPMF(1_000_000, 0.75).shouldApprox == fp_negativeBinomialPMF(1, 1_000_000, Fp!128(0.75)).fp_log!double;
474 }
475 
476 // testing more values
477 version(mir_stat_test_fp)
478 @safe pure nothrow @nogc
479 unittest {
480     import mir.bignum.fp: Fp, fp_log;
481     import mir.test: shouldApprox;
482 
483     enum size_t val = 1_000_000;
484 
485     0.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(0, val + 5, Fp!128(0.75)).fp_log!double;
486     1.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(1, val + 5, Fp!128(0.75)).fp_log!double;
487     2.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(2, val + 5, Fp!128(0.75)).fp_log!double;
488     5.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(5, val + 5, Fp!128(0.75)).fp_log!double;
489     (val / 2).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val / 2, val + 5, Fp!128(0.75)).fp_log!double;
490     (val - 5).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 5, val + 5, Fp!128(0.75)).fp_log!double;
491     (val - 2).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 2, val + 5, Fp!128(0.75)).fp_log!double;
492     (val - 1).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 1, val + 5, Fp!128(0.75)).fp_log!double;
493     (val - 0).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val, val + 5, Fp!128(0.75)).fp_log!double;
494 }
The OpenD Programming Language