1 /++ 2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution). 3 4 There are multiple alternative formulations of the Negative Binomial Distribution. The 5 formulation in this module uses the number of Bernoulli trials until `r` successes. 6 7 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 8 9 Authors: John Michael Hall 10 11 Copyright: 2022-3 Mir Stat Authors. 12 13 +/ 14 15 module mir.stat.distribution.negative_binomial; 16 17 import mir.bignum.fp: Fp; 18 import mir.internal.utility: isFloatingPoint; 19 20 /++ 21 Computes the negative binomial probability mass function (PMF). 22 23 Params: 24 k = value to evaluate PMF (e.g. number of "heads") 25 r = number of successes until stopping 26 p = `true` probability 27 28 29 See_also: 30 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 31 +/ 32 @safe pure nothrow @nogc 33 T negativeBinomialPMF(T)(const size_t k, const size_t r, const T p) 34 if (isFloatingPoint!T) 35 in (r > 0, "number of failures must be larger than zero") 36 in (p >= 0, "p must be greater than or equal to 0") 37 in (p <= 1, "p must be less than or equal to 1") 38 { 39 import mir.math.common: pow; 40 import mir.combinatorics: binomial; 41 42 return binomial(k + r - 1, r - 1) * pow(1 - p, k) * pow(p, r); 43 } 44 45 /// 46 version(mir_stat_test) 47 @safe pure nothrow @nogc 48 unittest { 49 import mir.test: shouldApprox; 50 51 4.negativeBinomialPMF(6, 3.0 / 4).shouldApprox == 0.0875988; 52 } 53 54 // 55 version(mir_stat_test) 56 @safe pure nothrow @nogc 57 unittest { 58 import mir.test: shouldApprox; 59 60 0.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0877915; 61 1.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.175583; 62 2.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.2048468; 63 3.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.1820861; 64 4.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.1365645; 65 5.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.09104303; 66 6.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.05563741; 67 7.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0317928; 68 8.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.0172211; 69 9.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.008929461; 70 10.negativeBinomialPMF(6, 2.0 / 3).shouldApprox == 0.00446473; 71 } 72 73 /++ 74 Computes the negative binomial probability mass function (PMF) directly with extended 75 floating point types (e.g. `Fp!128`), which provides additional accuracy for 76 extreme values of `k`, `r`, or `p`. 77 78 Params: 79 k = value to evaluate PMF (e.g. number of "heads") 80 r = number of successes until stopping 81 p = `true` probability 82 83 See_also: 84 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 85 +/ 86 @safe pure nothrow @nogc 87 T fp_negativeBinomialPMF(T)(const size_t k, const size_t r, const T p) 88 if (is(T == Fp!size, size_t size)) 89 in (r > 0, "number of failures must be larger than zero") 90 in (cast(double) p >= 0, "p must be greater than or equal to 0") 91 in (cast(double) p <= 1, "p must be less than or equal to 1") 92 { 93 import mir.math.internal.fp_powi: fp_powi; 94 import mir.math.numeric: binomialCoefficient; 95 96 return binomialCoefficient(k + r - 1, cast(const uint) (r - 1)) * fp_powi(T(1 - cast(double) p), k) * fp_powi(p, r); 97 } 98 99 /// fp_binomialPMF provides accurate values for large values of `n` 100 version(mir_stat_test_fp) 101 @safe pure nothrow @nogc 102 unittest { 103 import mir.bignum.fp: Fp, fp_log; 104 import mir.test: shouldApprox; 105 106 1.fp_negativeBinomialPMF(1_000_000, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(1, 1_000_000, 0.75); 107 } 108 109 110 // test more values 111 version(mir_stat_test_fp) 112 @safe pure nothrow @nogc 113 unittest { 114 import mir.bignum.fp: Fp, fp_log; 115 import mir.test: shouldApprox; 116 117 enum size_t val = 1_000_000; 118 119 0.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(0, val + 5, 0.75); 120 1.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(1, val + 5, 0.75); 121 2.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(2, val + 5, 0.75); 122 5.fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(5, val + 5, 0.75); 123 (val / 2).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val / 2, val + 5, 0.75); 124 (val - 5).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 5, val + 5, 0.75); 125 (val - 2).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 2, val + 5, 0.75); 126 (val - 1).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val - 1, val + 5, 0.75); 127 (val - 0).fp_negativeBinomialPMF(val + 5, Fp!128(0.75)).fp_log!double.shouldApprox == negativeBinomialLPMF(val, val + 5, 0.75); 128 } 129 130 // using Fp!128 131 version(mir_stat_test) 132 @safe pure nothrow @nogc 133 unittest { 134 import mir.conv: to; 135 import mir.test: shouldApprox; 136 137 for (size_t i; i <= 5; i++) { 138 i.fp_negativeBinomialPMF(5, Fp!128(0.50)).to!double.shouldApprox == negativeBinomialPMF(i, 5, 0.50); 139 i.fp_negativeBinomialPMF(5, Fp!128(0.75)).to!double.shouldApprox == negativeBinomialPMF(i, 5, 0.75); 140 } 141 } 142 143 /++ 144 Computes the negative binomial cumulative distribution function (CDF). 145 146 Params: 147 k = value to evaluate CDF (e.g. number of "heads") 148 r = number of successes until stopping 149 p = `true` probability 150 151 See_also: 152 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 153 +/ 154 @safe pure nothrow @nogc 155 T negativeBinomialCDF(T)(const size_t k, const size_t r, const T p) 156 if (isFloatingPoint!T) 157 in (r > 0, "number of failures must be larger than zero") 158 in (p >= 0, "p must be greater than or equal to 0") 159 in (p <= 1, "p must be less than or equal to 1") 160 { 161 import mir.math.common: pow; 162 import std.mathspecial: betaIncomplete; 163 164 if (k == 0) { 165 return pow(p, r); 166 } 167 return 1 - betaIncomplete(k + 1, r, 1 - p); 168 } 169 170 /// 171 version(mir_stat_test) 172 @safe pure nothrow @nogc 173 unittest { 174 import mir.test: shouldApprox; 175 176 4.negativeBinomialCDF(6, 3.0 / 4).shouldApprox == 0.9218731; 177 } 178 179 // test multiple 180 version(mir_stat_test) 181 @safe pure nothrow @nogc 182 unittest { 183 import mir.test: shouldApprox; 184 185 static double sumOfnegativeBinomialPMFs(T)(size_t k, size_t r, T p) { 186 double result = 0.0; 187 for (size_t i; i <= k; i++) { 188 result += negativeBinomialPMF(i, r, p); 189 } 190 return result; 191 } 192 193 for (size_t i; i <= 10; i++) { 194 i.negativeBinomialCDF(5, 0.25).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.25); 195 i.negativeBinomialCDF(5, 0.50).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.50); 196 i.negativeBinomialCDF(5, 0.75).shouldApprox == sumOfnegativeBinomialPMFs(i, 5, 0.75); 197 198 i.negativeBinomialCDF(6, 0.25).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.25); 199 i.negativeBinomialCDF(6, 0.5).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.5); 200 i.negativeBinomialCDF(6, 0.75).shouldApprox == sumOfnegativeBinomialPMFs(i, 6, 0.75); 201 } 202 } 203 204 /++ 205 Computes the negative binomial complementary cumulative distribution function (CCDF). 206 207 Params: 208 k = value to evaluate CCDF (e.g. number of "heads") 209 r = number of successes until stopping 210 p = `true` probability 211 212 See_also: 213 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 214 +/ 215 @safe pure nothrow @nogc 216 T negativeBinomialCCDF(T)(const size_t k, const size_t r, const T p) 217 if (isFloatingPoint!T) 218 in (r > 0, "number of failures must be larger than zero") 219 in (p >= 0, "p must be greater than or equal to 0") 220 in (p <= 1, "p must be less than or equal to 1") 221 { 222 import mir.math.common: pow; 223 import std.mathspecial: betaIncomplete; 224 225 if (k == 0) { 226 return 1 - pow(p, r); 227 } 228 return betaIncomplete(k + 1, r, 1 - p); 229 } 230 231 /// 232 version(mir_stat_test) 233 @safe pure nothrow @nogc 234 unittest { 235 import mir.test: shouldApprox; 236 237 4.negativeBinomialCCDF(6, 3.0 / 4).shouldApprox == 0.07812691; 238 } 239 240 // test multiple 241 version(mir_stat_test) 242 @safe pure nothrow @nogc 243 unittest { 244 import mir.test: shouldApprox; 245 246 for (size_t i; i <= 10; i++) { 247 i.negativeBinomialCCDF(5, 0.25).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.25); 248 i.negativeBinomialCCDF(5, 0.50).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.50); 249 i.negativeBinomialCCDF(5, 0.75).shouldApprox == 1 - negativeBinomialCDF(i, 5, 0.75); 250 251 i.negativeBinomialCCDF(6, 0.25).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.25); 252 i.negativeBinomialCCDF(6, 0.5).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.5); 253 i.negativeBinomialCCDF(6, 0.75).shouldApprox == 1 - negativeBinomialCDF(i, 6, 0.75); 254 } 255 } 256 257 private 258 @safe pure nothrow @nogc 259 size_t negativeBinomialInvCDFSearch(T)(const size_t guess, ref T cdfGuess, const T q, const size_t r, const T p, const size_t searchIncrement) 260 if (isFloatingPoint!T) 261 in (r > 0, "number of failures must be larger than zero") 262 in (q >= 0, "q must be greater than or equal to 0") 263 in (q <= 1, "q must be less than or equal to 1") 264 in (p >= 0, "p must be greater than or equal to 0") 265 in (p <= 1, "p must be less than or equal to 1") 266 { 267 size_t guessNew = guess; 268 if (q <= cdfGuess) { 269 T cdfGuessPrevious; 270 while (guessNew > 0) { 271 cdfGuessPrevious = cdfGuess; 272 cdfGuess = negativeBinomialCDF(guessNew - searchIncrement, r, p); 273 if (q > cdfGuess) { 274 cdfGuess = cdfGuessPrevious; 275 break; 276 } 277 guessNew = guessNew > searchIncrement ? guessNew - searchIncrement : 0; 278 } 279 } else { 280 while (q > cdfGuess) { 281 guessNew = guessNew + searchIncrement; 282 cdfGuess = negativeBinomialCDF(guessNew, r, p); 283 } 284 } 285 return guessNew; 286 } 287 288 /++ 289 Computes the negative binomial inverse cumulative distribution function (InvCDF). 290 291 Params: 292 q = value to evaluate InvCDF 293 r = number of successes until stopping 294 p = `true` probability 295 296 See_also: 297 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 298 +/ 299 @safe pure nothrow @nogc 300 size_t negativeBinomialInvCDF(T)(const T q, const size_t r, const T p) 301 if (isFloatingPoint!T) 302 in (r > 0, "number of failures must be larger than zero") 303 in (q >= 0, "q must be greater than or equal to 0") 304 in (q <= 1, "q must be less than or equal to 1") 305 in (p >= 0, "p must be greater than or equal to 0") 306 in (p <= 1, "p must be less than or equal to 1") 307 { 308 import mir.math.common: floor, sqrt; 309 import mir.stat.distribution.normal: normalInvCDF; 310 311 if (q == 0) { 312 return 0; 313 } else if (q == 1) { 314 return size_t.max; 315 } 316 317 size_t guess = 0; 318 T mu = r * (1 - p) / p; 319 T pre_std = sqrt(r * (1 - p)); 320 T std = pre_std / p; 321 T z = normalInvCDF(q); 322 if (r > 20 && p > 0.25 && p < 0.75) { 323 guess = cast(size_t) floor(mu + std * z - 0.5); 324 } else { 325 // Cornish-Fisher Approximation 326 guess = cast(size_t) floor(mu + std * (z + ((2 - p) / pre_std) * (z * z - 1) / 6)); 327 } 328 T cdfGuess = negativeBinomialCDF(guess, r, p); 329 330 if (guess < 10_000) { 331 return negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, 1); 332 } else { 333 // Faster search for large values of guess 334 size_t searchIncrement = cast(size_t) floor(guess * 0.001); 335 size_t searchIncrementPrevious; 336 do { 337 searchIncrementPrevious = searchIncrement; 338 guess = negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, searchIncrement); 339 searchIncrement = cast(size_t) floor(searchIncrement * 0.01); 340 } while (searchIncrementPrevious > 0 && searchIncrement > guess * (10 * T.epsilon)); 341 if (searchIncrementPrevious <= 1) { 342 return guess; 343 } else { 344 return negativeBinomialInvCDFSearch(guess, cdfGuess, q, r, p, 1); 345 } 346 } 347 } 348 349 /// 350 version(mir_stat_test) 351 @safe pure nothrow @nogc 352 unittest { 353 import mir.test: should; 354 0.9.negativeBinomialInvCDF(6, 3.0 / 4).should == 4; 355 } 356 357 // 358 version(mir_stat_test) 359 @safe pure nothrow 360 unittest { 361 import mir.test: should; 362 363 0.negativeBinomialInvCDF(5, 0.6).should == 0; 364 1.negativeBinomialInvCDF(5, 0.6).should == size_t.max; 365 366 for (double x = 0.05; x < 1; x = x + 0.05) { 367 size_t value = x.negativeBinomialInvCDF(5, 0.6); 368 value.negativeBinomialCDF(5, 0.6).should!"a >= b"(x); 369 if (value > 0) { 370 (value - 1).negativeBinomialCDF(5, 0.6).should!"a < b"(x); 371 } 372 } 373 } 374 375 // alternate guess paths 376 version(mir_stat_test) 377 @safe pure nothrow 378 unittest { 379 import mir.test: should; 380 381 static immutable size_t[] ns = [ 25, 37, 34, 25, 25, 105]; 382 static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025]; 383 384 size_t value; 385 for (size_t i; i < 6; i++) { 386 for (double x = 0.05; x < 1; x = x + 0.05) { 387 value = x.negativeBinomialInvCDF(ns[i], ps[i]); 388 negativeBinomialCDF(value, ns[i], ps[i]).should!"a >= b"(x); 389 if (value > 0) { 390 negativeBinomialCDF(value - 1, ns[i], ps[i]).should!"a < b"(x); 391 } 392 } 393 } 394 } 395 396 // more detailed guess paths 397 version(mir_stat_test_binom_multi) 398 @safe pure nothrow 399 unittest { 400 import mir.test: should; 401 402 static immutable size_t[] ns = [ 25, 37, 34, 25, 25, 105]; 403 static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025]; 404 405 size_t value; 406 for (size_t i; i < 6; i++) { 407 for (double x = 0.01; x < 1; x = x + 0.01) { 408 value = x.negativeBinomialInvCDF(ns[i], ps[i]); 409 negativeBinomialCDF(value, ns[i], ps[i]).should!"a >= b"(x); 410 if (value > 0) { 411 negativeBinomialCDF(value - 1, ns[i], ps[i]).should!"a < b"(x); 412 } 413 } 414 } 415 } 416 417 /++ 418 Computes the negative binomial log probability mass function (LPMF). 419 420 Params: 421 k = value to evaluate PMF (e.g. number of "heads") 422 r = number of successes until stopping 423 p = `true` probability 424 425 See_also: 426 $(LINK2 https://en.wikipedia.org/wiki/Negative_binomial_distribution, Negative Binomial Distribution) 427 +/ 428 @safe pure nothrow @nogc 429 T negativeBinomialLPMF(T)(const size_t k, const size_t r, const T p) 430 if (isFloatingPoint!T) 431 in (r > 0, "number of failures must be larger than zero") 432 in (p >= 0, "p must be greater than or equal to 0") 433 in (p <= 1, "p must be less than or equal to 1") 434 { 435 import mir.math.internal.xlogy: xlogy, xlog1py; 436 import mir.math.internal.log_binomial: logBinomialCoefficient; 437 438 return logBinomialCoefficient(k + r - 1, cast(const uint) (r - 1)) + xlog1py(k, -p) + xlogy(r, p); 439 } 440 441 /// 442 version(mir_stat_test) 443 @safe pure nothrow @nogc 444 unittest { 445 import mir.math.common: exp; 446 import mir.test: shouldApprox; 447 448 4.negativeBinomialLPMF(6, 3.0 / 4).exp.shouldApprox == 4.negativeBinomialPMF(6, 3.0 / 4); 449 } 450 451 // 452 version(mir_stat_test) 453 @safe pure nothrow @nogc 454 unittest { 455 import mir.math.common: exp; 456 import mir.test: shouldApprox; 457 458 for (size_t i; i <= 10; i++) { 459 i.negativeBinomialLPMF(5, 0.5).exp.shouldApprox == negativeBinomialPMF(i, 5, 0.5); 460 i.negativeBinomialLPMF(5, 0.75).exp.shouldApprox == negativeBinomialPMF(i, 5, 0.75); 461 } 462 } 463 464 /// Accurate values for large values of `n` 465 version(mir_stat_test_fp) 466 @safe pure nothrow @nogc 467 unittest { 468 import mir.bignum.fp: Fp, fp_log; 469 import mir.test: shouldApprox; 470 471 enum size_t val = 1_000_000; 472 473 1.negativeBinomialLPMF(1_000_000, 0.75).shouldApprox == fp_negativeBinomialPMF(1, 1_000_000, Fp!128(0.75)).fp_log!double; 474 } 475 476 // testing more values 477 version(mir_stat_test_fp) 478 @safe pure nothrow @nogc 479 unittest { 480 import mir.bignum.fp: Fp, fp_log; 481 import mir.test: shouldApprox; 482 483 enum size_t val = 1_000_000; 484 485 0.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(0, val + 5, Fp!128(0.75)).fp_log!double; 486 1.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(1, val + 5, Fp!128(0.75)).fp_log!double; 487 2.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(2, val + 5, Fp!128(0.75)).fp_log!double; 488 5.negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(5, val + 5, Fp!128(0.75)).fp_log!double; 489 (val / 2).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val / 2, val + 5, Fp!128(0.75)).fp_log!double; 490 (val - 5).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 5, val + 5, Fp!128(0.75)).fp_log!double; 491 (val - 2).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 2, val + 5, Fp!128(0.75)).fp_log!double; 492 (val - 1).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val - 1, val + 5, Fp!128(0.75)).fp_log!double; 493 (val - 0).negativeBinomialLPMF(val + 5, 0.75).shouldApprox == fp_negativeBinomialPMF(val, val + 5, Fp!128(0.75)).fp_log!double; 494 }