1 /++ 2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution). 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 6 Authors: John Michael Hall 7 8 Copyright: 2022-3 Mir Stat Authors. 9 10 +/ 11 12 module mir.stat.distribution.binomial; 13 14 import mir.bignum.fp: Fp; 15 import mir.internal.utility: isFloatingPoint; 16 import mir.stat.distribution.poisson: PoissonAlgo; 17 18 /++ 19 Algorithms used to calculate binomial distribution. 20 21 `BinomialAlgo.direct` can be more time-consuming for large values of the number 22 of events (`k`) or the number of trials (`n`). Additional algorithms are 23 provided to the user to choose the trade-off between running time and accuracy. 24 25 See_also: 26 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 27 +/ 28 enum BinomialAlgo { 29 /++ 30 Direct 31 +/ 32 direct, 33 /++ 34 Approximates binomial distribution with normal distribution. Generally a better approximation when 35 `n > 20` and `p` is far from 0 or 1, but a variety of rules of thumb can help determine when it is 36 appropriate to use. 37 +/ 38 approxNormal, 39 /++ 40 Approximates binomial distribution with normal distribution (including continuity correction). More 41 accurate than `BinomialAlgo.approxNormal`. 42 +/ 43 approxNormalContinuityCorrection, 44 /++ 45 Approximates binomial distribution with poisson distribution (also requires specifying poissonAlgo). 46 Generally a better approximation when `n >= 20` and `p <= 0.05` or when `n >= 100` and `np <= 10`. 47 +/ 48 approxPoisson 49 } 50 51 private 52 @safe pure nothrow @nogc 53 T binomialPMFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 54 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.direct) 55 in (k <= n, "k must be less than or equal to n") 56 in (p >= 0, "p must be greater than or equal to 0") 57 in (p <= 1, "p must be less than or equal to 1") 58 { 59 import mir.math.common: pow; 60 import mir.combinatorics: binomial; 61 62 return binomial(n, k) * pow(p, k) * pow(1 - p, n - k); 63 } 64 65 private 66 @safe pure nothrow @nogc 67 T binomialPMFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 68 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.approxNormal) 69 in (k <= n, "k must be less than or equal to n") 70 in (p >= 0, "p must be greater than or equal to 0") 71 in (p <= 1, "p must be less than or equal to 1") 72 { 73 import mir.math.common: sqrt; 74 import mir.stat.distribution.normal: normalPDF; 75 76 return normalPDF(k, n * p, sqrt(n * p * (1 - p))); 77 } 78 79 private 80 @safe pure nothrow @nogc 81 T binomialPMFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 82 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection) 83 in (k <= n, "k must be less than or equal to n") 84 in (p >= 0, "p must be greater than or equal to 0") 85 in (p <= 1, "p must be less than or equal to 1") 86 { 87 import mir.math.common: sqrt; 88 import mir.stat.distribution.normal: normalCDF; 89 90 return normalCDF(cast(T) k + 0.5, n * p, sqrt(n * p * (1 - p))) - normalCDF(cast(T) k - 0.5, n * p, sqrt(n * p * (1 - p))); 91 } 92 93 private 94 @safe pure nothrow @nogc 95 T binomialPMFImpl(T, BinomialAlgo binomialAlgo, PoissonAlgo poissonAlgo)(const size_t k, const size_t n, const T p) 96 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.approxPoisson) 97 in (k <= n, "k must be less than or equal to n") 98 in (p >= 0, "p must be greater than or equal to 0") 99 in (p <= 1, "p must be less than or equal to 1") 100 { 101 import mir.stat.distribution.poisson: poissonPMF; 102 103 return poissonPMF!poissonAlgo(k, n * p); 104 } 105 106 /++ 107 Computes the binomial probability mass function (PMF). 108 109 Additional algorithms may be provided for calculating PMF that allow trading off 110 time and accuracy. If `approxPoisson` is provided, the default is `PoissonAlgo.gamma` 111 112 Params: 113 binomialAlgo = algorithm for calculating PMF (default: BinomialAlgo.direct) 114 poissonAlgo = algorithm for poisson approximation (default: PoissonAlgo.gamma) 115 116 See_also: 117 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 118 +/ 119 @safe pure nothrow @nogc 120 template binomialPMF(BinomialAlgo binomialAlgo = BinomialAlgo.direct, 121 PoissonAlgo poissonAlgo = PoissonAlgo.gamma) 122 { 123 /++ 124 Params: 125 k = value to evaluate PMF (e.g. number of "heads") 126 n = number of trials 127 p = `true` probability 128 +/ 129 T binomialPMF(T)(const size_t k, const size_t n, const T p) 130 if (isFloatingPoint!T) 131 in (k <= n, "k must be less than or equal to n") 132 in (p >= 0, "p must be greater than or equal to 0") 133 in (p <= 1, "p must be less than or equal to 1") 134 { 135 static if (binomialAlgo != BinomialAlgo.approxPoisson) 136 return binomialPMFImpl!(T, binomialAlgo)(k, n, p); 137 else 138 return binomialPMFImpl!(T, binomialAlgo, poissonAlgo)(k, n, p); 139 } 140 } 141 142 /// ditto 143 @safe pure nothrow @nogc 144 template binomialPMF(string binomialAlgo, string poissonAlgo = "gamma") 145 { 146 mixin("alias binomialPMF = .binomialPMF!(BinomialAlgo." ~ binomialAlgo ~ ", PoissonAlgo." ~ poissonAlgo ~ ");"); 147 } 148 149 /// 150 version(mir_stat_test) 151 @safe pure nothrow @nogc 152 unittest { 153 import mir.math.common: approxEqual, pow; 154 155 assert(4.binomialPMF(6, 2.0 / 3).approxEqual(15.0 * pow(2.0 / 3, 4) * pow(1.0 / 3, 2))); 156 // For large values of `n` with `p` not too extreme, can approximate with normal distribution 157 assert(550_000.binomialPMF!"approxNormal"(1_000_000, 0.55).approxEqual(0.0008019042)); 158 // Or closer with continuity correction 159 assert(550_000.binomialPMF!"approxNormalContinuityCorrection"(1_000_000, 0.55).approxEqual(0.000801904)); 160 // Poisson approximation is better when `p` is low 161 assert(10_000.binomialPMF!"approxPoisson"(1_000_000, 0.01).approxEqual(0.00398939)); 162 } 163 164 // test multiple 165 version(mir_stat_test) 166 @safe pure nothrow @nogc 167 unittest { 168 import mir.math.common: approxEqual, pow; 169 import mir.combinatorics: binomial; 170 171 for (size_t i; i <= 5; i++) { 172 assert(i.binomialPMF(5, 0.25).approxEqual(binomial(5, i) * pow(0.25, i) * pow(0.75, 5 - i))); 173 assert(i.binomialPMF(5, 0.50).approxEqual(binomial(5, i) * pow(0.50, 5))); 174 assert(i.binomialPMF(5, 0.75).approxEqual(binomial(5, i) * pow(0.75, i) * pow(0.25, 5 - i))); 175 } 176 } 177 178 // test BinomialAlgo.approxNormal / approxNormalContinuityCorrection / approxPoisson 179 version(mir_stat_test) 180 @safe pure nothrow @nogc 181 unittest { 182 import mir.math.common: approxEqual, sqrt; 183 import mir.stat.distribution.normal: normalCDF, normalPDF; 184 import mir.stat.distribution.poisson: poissonPMF; 185 186 for (size_t i; i < 5; i++) { 187 assert(i.binomialPMF!"approxNormal"(5, 0.75).approxEqual(normalPDF(i, 5.0 * 0.75, sqrt(5.0 * 0.75 * 0.25)))); 188 assert(i.binomialPMF!"approxNormalContinuityCorrection"(5, 0.75).approxEqual(normalCDF(i + 0.5, 5.0 * 0.75, sqrt(5.0 * 0.75 * 0.25)) - normalCDF(i - 0.5, 5.0 * 0.75, sqrt(5.0 * 0.75 * 0.25)))); 189 assert(i.binomialPMF!"approxPoisson"(5, 0.75).approxEqual(poissonPMF!"gamma"(i, 5 * 0.75))); 190 assert(i.binomialPMF!("approxPoisson", "direct")(5, 0.75).approxEqual(poissonPMF(i, 5 * 0.75))); 191 } 192 } 193 194 /++ 195 Computes the binomial probability mass function (PMF) directly with extended 196 floating point types (e.g. `Fp!128`), which provides additional accuracy for 197 large values of `k`, `n`, or `p`. 198 199 Params: 200 k = value to evaluate PMF (e.g. number of "heads") 201 n = number of trials 202 p = `true` probability 203 204 See_also: 205 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 206 +/ 207 @safe pure nothrow @nogc 208 T fp_binomialPMF(T)(const size_t k, const size_t n, const T p) 209 if (is(T == Fp!size, size_t size)) 210 in (k <= n, "k must be less than or equal to n") 211 in (cast(double) p >= 0, "p must be greater than or equal to 0") 212 in (cast(double) p <= 1, "p must be less than or equal to 1") 213 { 214 import mir.math.internal.fp_powi: fp_powi; 215 import mir.math.numeric: binomialCoefficient; 216 217 return binomialCoefficient(n, cast(const uint) k) * fp_powi(p, k) * fp_powi(T(1 - cast(double) p), n - k); 218 } 219 220 /// fp_binomialPMF provides accurate values for large values of `n` 221 version(mir_stat_test_fp) 222 @safe pure nothrow @nogc 223 unittest { 224 import mir.bignum.fp: Fp, fp_log; 225 import mir.math.common: approxEqual; 226 227 assert(1.fp_binomialPMF(1_000_000, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(1, 1_000_000, 0.75))); 228 } 229 230 // more values to test 231 version(mir_stat_test_fp) 232 @safe pure nothrow @nogc 233 unittest { 234 import mir.bignum.fp: Fp, fp_log; 235 import mir.math.common: approxEqual; 236 237 enum size_t val = 1_000_000; 238 239 assert(0.fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(0, val + 5, 0.75))); 240 assert(1.fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(1, val + 5, 0.75))); 241 assert(2.fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(2, val + 5, 0.75))); 242 assert(5.fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(5, val + 5, 0.75))); 243 assert((val / 2).fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(val / 2, val + 5, 0.75))); 244 assert((val - 5).fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(val - 5, val + 5, 0.75))); 245 assert((val - 2).fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(val - 2, val + 5, 0.75))); 246 assert((val - 1).fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(val - 1, val + 5, 0.75))); 247 assert((val - 0).fp_binomialPMF(val + 5, Fp!128(0.75)).fp_log!double.approxEqual(binomialLPMF(val, val + 5, 0.75))); 248 } 249 250 // using Fp!128 251 version(mir_stat_test) 252 @safe pure nothrow @nogc 253 unittest { 254 import mir.conv: to; 255 import mir.math.common: approxEqual; 256 257 for (size_t i; i <= 5; i++) { 258 assert(i.fp_binomialPMF(5, Fp!128(0.50)).to!double.approxEqual(binomialPMF(i, 5, 0.50))); 259 assert(i.fp_binomialPMF(5, Fp!128(0.75)).to!double.approxEqual(binomialPMF(i, 5, 0.75))); 260 } 261 } 262 263 private 264 @safe pure nothrow @nogc 265 T binomialCDFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 266 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.direct) 267 in (k <= n, "k must be less than or equal to n") 268 in (p >= 0, "p must be greater than or equal to 0") 269 in (p <= 1, "p must be less than or equal to 1") 270 { 271 import mir.math.common: pow; 272 273 if (k == n) { 274 return 1; 275 } else if (k == 0) { 276 return pow(1 - p, n); 277 } else if (k <= n / 2 + 1) { 278 T result = 0; 279 280 foreach (size_t i; 0 .. (k + 1)) { 281 result += binomialPMFImpl!(T, binomialAlgo)(i, n, p); 282 } 283 284 return result; 285 } else { 286 return 1 - binomialCDFImpl!(T, binomialAlgo)(n - k - 1, n, 1 - p); 287 } 288 } 289 290 private 291 @safe pure nothrow @nogc 292 T binomialCDFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 293 if (isFloatingPoint!T && 294 (binomialAlgo == BinomialAlgo.approxNormal || 295 binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection)) 296 in (k <= n, "k must be less than or equal to n") 297 in (p >= 0, "p must be greater than or equal to 0") 298 in (p <= 1, "p must be less than or equal to 1") 299 { 300 import mir.math.common: sqrt; 301 import mir.stat.distribution.normal: normalCDF; 302 303 T l = k; 304 static if (binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection) { 305 l += 0.5; 306 } 307 return normalCDF(l, n * p, sqrt(n * p * (1 - p))); 308 } 309 310 private 311 @safe pure nothrow @nogc 312 T binomialCDFImpl(T, BinomialAlgo binomialAlgo, PoissonAlgo poissonAlgo)(const size_t k, const size_t n, const T p) 313 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.approxPoisson) 314 in (k <= n, "k must be less than or equal to n") 315 in (p >= 0, "p must be greater than or equal to 0") 316 in (p <= 1, "p must be less than or equal to 1") 317 { 318 import mir.stat.distribution.poisson: poissonCDF; 319 320 return poissonCDF!poissonAlgo(k, n * p); 321 } 322 323 /++ 324 Computes the binomial cumulative distribution function (CDF). 325 326 Additional algorithms may be provided for calculating CDF that allow trading off 327 time and accuracy. If `approxPoisson` is provided, the default is `PoissonAlgo.gamma` 328 329 Params: 330 binomialAlgo = algorithm for calculating CDF (default: BinomialAlgo.direct) 331 poissonAlgo = algorithm for poisson approximation (default: PoissonAlgo.gamma) 332 333 See_also: 334 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 335 +/ 336 @safe pure nothrow @nogc 337 template binomialCDF(BinomialAlgo binomialAlgo = BinomialAlgo.direct, 338 PoissonAlgo poissonAlgo = PoissonAlgo.gamma) 339 { 340 /++ 341 Params: 342 k = value to evaluate CDF (e.g. number of "heads") 343 n = number of trials 344 p = `true` probability 345 +/ 346 T binomialCDF(T)(const size_t k, const size_t n, const T p) 347 if (isFloatingPoint!T) 348 in (k <= n, "k must be less than or equal to n") 349 in (p >= 0, "p must be greater than or equal to 0") 350 in (p <= 1, "p must be less than or equal to 1") 351 { 352 static if (binomialAlgo != BinomialAlgo.approxPoisson) 353 return binomialCDFImpl!(T, binomialAlgo)(k, n, p); 354 else 355 return binomialCDFImpl!(T, binomialAlgo, poissonAlgo)(k, n, p); 356 } 357 } 358 359 /// ditto 360 @safe pure nothrow @nogc 361 template binomialCDF(string binomialAlgo, string poissonAlgo = "gamma") 362 { 363 mixin("alias binomialCDF = .binomialCDF!(BinomialAlgo." ~ binomialAlgo ~ ", PoissonAlgo." ~ poissonAlgo ~ ");"); 364 } 365 366 /// 367 version(mir_stat_test) 368 @safe pure nothrow @nogc 369 unittest { 370 import mir.math.common: approxEqual, pow; 371 372 assert(4.binomialCDF(6, 2.0 / 3).approxEqual(binomialPMF(0, 6, 2.0 / 3) + binomialPMF(1, 6, 2.0 / 3) + binomialPMF(2, 6, 2.0 / 3) + binomialPMF(3, 6, 2.0 / 3) + binomialPMF(4, 6, 2.0 / 3))); 373 // For large values of `n` with `p` not too extreme, can approximate with normal distribution 374 assert(550_000.binomialCDF!"approxNormal"(1_000_000, 0.55).approxEqual(0.5)); 375 // Or closer with continuity correction 376 assert(550_000.binomialCDF!"approxNormalContinuityCorrection"(1_000_000, 0.55).approxEqual(0.500401)); 377 // Poisson approximation is better when `p` is low 378 assert(10_000.binomialCDF!"approxPoisson"(1_000_000, 0.01).approxEqual(0.5026596)); 379 } 380 381 // test multiple direct 382 version(mir_stat_test) 383 @safe pure nothrow @nogc 384 unittest { 385 import mir.math.common: approxEqual; 386 387 static double sumOfbinomialPMFs(T)(size_t k, size_t n, T p) { 388 double result = 0.0; 389 for (size_t i; i <= k; i++) { 390 result += binomialPMF(i, n, p); 391 } 392 return result; 393 } 394 395 // n = 5 396 for (size_t i; i <= 5; i++) { 397 assert(i.binomialCDF(5, 0.25).approxEqual(sumOfbinomialPMFs(i, 5, 0.25))); 398 assert(i.binomialCDF(5, 0.50).approxEqual(sumOfbinomialPMFs(i, 5, 0.50))); 399 assert(i.binomialCDF(5, 0.75).approxEqual(sumOfbinomialPMFs(i, 5, 0.75))); 400 } 401 402 // n = 6 403 for (size_t i; i <= 6; i++) { 404 assert(i.binomialCDF(6, 0.25).approxEqual(sumOfbinomialPMFs(i, 6, 0.25))); 405 assert(i.binomialCDF(6, 0.5).approxEqual(sumOfbinomialPMFs(i, 6, 0.5))); 406 assert(i.binomialCDF(6, 0.75).approxEqual(sumOfbinomialPMFs(i, 6, 0.75))); 407 } 408 } 409 410 // test BinomialAlgo.approxNormal / approxNormalContinuityCorrection / approxPoisson 411 version(mir_stat_test) 412 @safe pure nothrow @nogc 413 unittest { 414 import mir.math.common: approxEqual, sqrt; 415 import mir.stat.distribution.normal: normalCDF; 416 import mir.stat.distribution.poisson: poissonCDF; 417 418 for (size_t i; i < 5; i++) { 419 assert(i.binomialCDF!"approxNormal"(5, 0.75).approxEqual(normalCDF(i, 5.0 * 0.75, sqrt(5.0 * 0.75 * 0.25)))); 420 assert(i.binomialCDF!"approxNormalContinuityCorrection"(5, 0.75).approxEqual(normalCDF(i + 0.5, 5.0 * 0.75, sqrt(5.0 * 0.75 * 0.25)))); 421 assert(i.binomialCDF!"approxPoisson"(5, 0.75).approxEqual(poissonCDF!"gamma"(i, 5 * 0.75))); 422 assert(i.binomialCDF!("approxPoisson", "direct")(5, 0.75).approxEqual(poissonCDF(i, 5 * 0.75))); 423 } 424 } 425 426 private 427 @safe pure nothrow @nogc 428 T binomialCCDFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 429 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.direct) 430 in (k <= n, "k must be less than or equal to n") 431 in (p >= 0, "p must be greater than or equal to 0") 432 in (p <= 1, "p must be less than or equal to 1") 433 { 434 import mir.math.common: pow; 435 436 if (k == n) { 437 return 0; 438 } else if (k == 0) { 439 return 1 - pow(1 - p, n); 440 } else if (k >= n / 2) { 441 T result = 0; 442 443 foreach (size_t i; (k + 1) .. (n + 1)) { 444 result += binomialPMFImpl!(T, binomialAlgo)(i, n, p); 445 } 446 447 return result; 448 } else { 449 return 1 - binomialCCDFImpl!(T, binomialAlgo)(n - k - 1, n, 1 - p); 450 } 451 } 452 453 private 454 @safe pure nothrow @nogc 455 T binomialCCDFImpl(T, BinomialAlgo binomialAlgo)(const size_t k, const size_t n, const T p) 456 if (isFloatingPoint!T && 457 (binomialAlgo == BinomialAlgo.approxNormal || 458 binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection)) 459 in (k <= n, "k must be less than or equal to n") 460 in (p >= 0, "p must be greater than or equal to 0") 461 in (p <= 1, "p must be less than or equal to 1") 462 { 463 import mir.math.common: sqrt; 464 import mir.stat.distribution.normal: normalCCDF; 465 466 T l = k; 467 static if (binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection) { 468 l += 0.5; 469 } 470 return normalCCDF(l, n * p, sqrt(n * p * (1 - p))); 471 } 472 473 private 474 @safe pure nothrow @nogc 475 T binomialCCDFImpl(T, BinomialAlgo binomialAlgo, PoissonAlgo poissonAlgo)(const size_t k, const size_t n, const T p) 476 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.approxPoisson) 477 in (k <= n, "k must be less than or equal to n") 478 in (p >= 0, "p must be greater than or equal to 0") 479 in (p <= 1, "p must be less than or equal to 1") 480 { 481 import mir.stat.distribution.poisson: poissonCCDF; 482 483 return poissonCCDF!poissonAlgo(k, n * p); 484 } 485 486 /++ 487 Computes the binomial complementary cumulative distribution function (CCDF). 488 489 Additional algorithms may be provided for calculating CCDF that allow trading off 490 time and accuracy. If `approxPoisson` is provided, the default is `PoissonAlgo.gamma` 491 492 Params: 493 binomialAlgo = algorithm for calculating CCDF (default: BinomialAlgo.direct) 494 poissonAlgo = algorithm for poisson approximation (default: PoissonAlgo.gamma) 495 496 See_also: 497 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 498 +/ 499 @safe pure nothrow @nogc 500 template binomialCCDF(BinomialAlgo binomialAlgo = BinomialAlgo.direct, 501 PoissonAlgo poissonAlgo = PoissonAlgo.gamma) 502 { 503 /++ 504 Params: 505 k = value to evaluate CCDF (e.g. number of "heads") 506 n = number of trials 507 p = `true` probability 508 +/ 509 T binomialCCDF(T)(const size_t k, const size_t n, const T p) 510 if (isFloatingPoint!T) 511 in (k <= n, "k must be less than or equal to n") 512 in (p >= 0, "p must be greater than or equal to 0") 513 in (p <= 1, "p must be less than or equal to 1") 514 { 515 static if (binomialAlgo != BinomialAlgo.approxPoisson) 516 return binomialCCDFImpl!(T, binomialAlgo)(k, n, p); 517 else 518 return binomialCCDFImpl!(T, binomialAlgo, poissonAlgo)(k, n, p); 519 } 520 } 521 522 /// ditto 523 @safe pure nothrow @nogc 524 template binomialCCDF(string binomialAlgo, string poissonAlgo = "gamma") 525 { 526 mixin("alias binomialCCDF = .binomialCCDF!(BinomialAlgo." ~ binomialAlgo ~ ", PoissonAlgo." ~ poissonAlgo ~ ");"); 527 } 528 529 /// 530 version(mir_stat_test) 531 @safe pure nothrow @nogc 532 unittest { 533 import mir.math.common: approxEqual, pow; 534 535 assert(4.binomialCCDF(6, 2.0 / 3).approxEqual(binomialPMF(5, 6, 2.0 / 3) + binomialPMF(6, 6, 2.0 / 3))); 536 // For large values of `n` with `p` not too extreme, can approximate with normal distribution 537 assert(550_000.binomialCCDF!"approxNormal"(1_000_000, 0.55).approxEqual(0.5)); 538 // Or closer with continuity correction 539 assert(550_000.binomialCCDF!"approxNormalContinuityCorrection"(1_000_000, 0.55).approxEqual(0.499599)); 540 // Poisson approximation is better when `p` is low 541 assert(10_000.binomialCCDF!"approxPoisson"(1_000_000, 0.01).approxEqual(0.4973404)); 542 } 543 544 // test multiple 545 version(mir_stat_test) 546 @safe pure nothrow @nogc 547 unittest { 548 import mir.math.common: approxEqual; 549 550 // n = 5 551 for (size_t i; i <= 5; i++) { 552 assert(i.binomialCCDF(5, 0.25).approxEqual(1 - binomialCDF(i, 5, 0.25))); 553 assert(i.binomialCCDF(5, 0.50).approxEqual(1 - binomialCDF(i, 5, 0.50))); 554 assert(i.binomialCCDF(5, 0.75).approxEqual(1 - binomialCDF(i, 5, 0.75))); 555 } 556 557 // n = 6 558 for (size_t i; i <= 6; i++) { 559 assert(i.binomialCCDF(6, 0.25).approxEqual(1 - binomialCDF(i, 6, 0.25))); 560 assert(i.binomialCCDF(6, 0.5).approxEqual(1 - binomialCDF(i, 6, 0.5))); 561 assert(i.binomialCCDF(6, 0.75).approxEqual(1 - binomialCDF(i, 6, 0.75))); 562 } 563 } 564 565 // test approxNormal / approxNormalContinuityCorrection / approxPoisson 566 version(mir_stat_test) 567 @safe pure nothrow @nogc 568 unittest { 569 import mir.math.common: approxEqual; 570 571 for (size_t i; i <= 5; i++) { 572 assert(i.binomialCCDF!"approxNormal"(5, 0.25).approxEqual(1 - binomialCDF!"approxNormal"(i, 5, 0.25))); 573 assert(i.binomialCCDF!"approxNormalContinuityCorrection"(5, 0.25).approxEqual(1 - binomialCDF!"approxNormalContinuityCorrection"(i, 5, 0.25))); 574 assert(i.binomialCCDF!"approxPoisson"(5, 0.25).approxEqual(1 - binomialCDF!"approxPoisson"(i, 5, 0.25))); 575 assert(i.binomialCCDF!("approxPoisson", "direct")(5, 0.25).approxEqual(1 - binomialCDF!("approxPoisson", "direct")(i, 5, 0.25))); 576 } 577 } 578 579 private 580 @safe pure nothrow @nogc 581 size_t binomialInvCDFImpl(T, BinomialAlgo binomialAlgo)(const T q, const size_t n, const T p) 582 if (isFloatingPoint!T && binomialAlgo == BinomialAlgo.direct) 583 in (q >= 0, "q must be greater than or equal to 0") 584 in (q <= 1, "q must be less than or equal to 1") 585 in (p >= 0, "p must be greater than or equal to 0") 586 in (p <= 1, "p must be less than or equal to 1") 587 { 588 if (q == 0) { 589 return 0; 590 } else if (q == 1) { 591 return n; 592 } 593 594 size_t guess = 0; 595 if ((n > 20 && (p > 0.25 && p < 0.75)) || 596 (n * p > 9 * (1 - p) && n * (1 - p) > 9 * p) || 597 (n * p >= 5 && n * (1 - p) >= 5)) { 598 guess = binomialInvCDFImpl!(T, BinomialAlgo.approxNormalContinuityCorrection)(q, n, p); 599 } else if ((n >= 20 && p <= 0.05) || 600 (n >= 100 && n * p <= 10)) { 601 guess = binomialInvCDFImpl!(T, BinomialAlgo.approxPoisson, PoissonAlgo.approxNormalContinuityCorrection)(q, n, p); 602 } 603 T cdfGuess = binomialCDF!(binomialAlgo)(guess, n, p); 604 605 if (q <= cdfGuess) { 606 if (guess == 0) { 607 return guess; 608 } 609 for (size_t i = (guess - 1); guess >= 0; i--) { 610 cdfGuess -= binomialPMF!(binomialAlgo)(i + 1, n, p); 611 if (q > cdfGuess) { 612 guess = i + 1; 613 break; 614 } 615 } 616 } else { 617 while(q > cdfGuess) { 618 guess++; 619 cdfGuess += binomialPMF!(binomialAlgo)(guess, n, p); 620 } 621 } 622 return guess; 623 } 624 625 private 626 @safe pure nothrow @nogc 627 size_t binomialInvCDFImpl(T, BinomialAlgo binomialAlgo)(const T q, const size_t n, const T p) 628 if (isFloatingPoint!T && 629 (binomialAlgo == BinomialAlgo.approxNormal || 630 binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection)) 631 in (q >= 0, "q must be greater than or equal to 0") 632 in (q <= 1, "q must be less than or equal to 1") 633 in (p >= 0, "p must be greater than or equal to 0") 634 in (p <= 1, "p must be less than or equal to 1") 635 { 636 import mir.math.common: floor, sqrt; 637 import mir.stat.distribution.normal: normalCDF, normalInvCDF; 638 639 T mu = n * p; 640 T std = sqrt(mu * (1 - p)); 641 642 // Handles case where q is small or large, better than just using probLowerBound = 0 or probUpperBound = 0 643 T probLowerBound = 0; 644 T probUpperBound = 1; 645 T lowerValue = 0; 646 T upperValue = n; 647 static if (binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection) { 648 lowerValue += 0.5; 649 upperValue -= 0.5; 650 } 651 probLowerBound = normalCDF(lowerValue, mu, std); 652 probUpperBound = normalCDF(upperValue, mu, std); 653 if (q <= probLowerBound) { 654 return 0; 655 } else if (q >= probUpperBound) { 656 return n; 657 } 658 659 auto result = normalInvCDF(q, mu, std); 660 static if (binomialAlgo == BinomialAlgo.approxNormalContinuityCorrection) { 661 result = result - 0.5; 662 } 663 return cast(size_t) floor(result); 664 } 665 666 private 667 @safe pure nothrow @nogc 668 size_t binomialInvCDFImpl(T, BinomialAlgo binomialAlgo, PoissonAlgo poissonAlgo)(const T q, const size_t n, const T p) 669 if (isFloatingPoint!T && 670 binomialAlgo == BinomialAlgo.approxPoisson && poissonAlgo != PoissonAlgo.gamma) 671 in (q >= 0, "q must be greater than or equal to 0") 672 in (q <= 1, "q must be less than or equal to 1") 673 in (p >= 0, "p must be greater than or equal to 0") 674 in (p <= 1, "p must be less than or equal to 1") 675 { 676 import mir.stat.distribution.poisson: poissonInvCDF; 677 678 return poissonInvCDF!poissonAlgo(q, n * p); 679 } 680 681 /++ 682 Computes the binomial inverse cumulative distribution function (InvCDF). 683 684 Additional algorithms may be provided for calculating InvCDF that allow trading off 685 time and accuracy. If `approxPoisson` is provided, the default is 686 `PoissonAlgo.direct`, which is different from `binomialPMF` and `binomialCDF` 687 `PoissonAlgo.gamma` is not supported. 688 689 Params: 690 binomialAlgo = algorithm for calculating CDF (default: BinomialAlgo.direct) 691 poissonAlgo = algorithm for poisson approximation (default: PoissonAlgo.direct) 692 693 See_also: 694 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 695 +/ 696 @safe pure nothrow @nogc 697 template binomialInvCDF(BinomialAlgo binomialAlgo = BinomialAlgo.direct, 698 PoissonAlgo poissonAlgo = PoissonAlgo.direct) 699 if (poissonAlgo != PoissonAlgo.gamma) 700 { 701 /++ 702 Params: 703 q = value to evaluate InvCDF 704 n = number of trials 705 p = `true` probability 706 +/ 707 size_t binomialInvCDF(T)(const T q, const size_t n, const T p) 708 if (isFloatingPoint!T) 709 in (q >= 0, "q must be greater than or equal to 0") 710 in (q <= 1, "q must be less than or equal to 1") 711 in (p >= 0, "p must be greater than or equal to 0") 712 in (p <= 1, "p must be less than or equal to 1") 713 { 714 static if (binomialAlgo != BinomialAlgo.approxPoisson) 715 return binomialInvCDFImpl!(T, binomialAlgo)(q, n, p); 716 else 717 return binomialInvCDFImpl!(T, binomialAlgo, poissonAlgo)(q, n, p); 718 } 719 } 720 721 /// ditto 722 @safe pure nothrow @nogc 723 template binomialInvCDF(string binomialAlgo, string poissonAlgo = "direct") 724 { 725 mixin("alias binomialInvCDF = .binomialInvCDF!(BinomialAlgo." ~ binomialAlgo ~ ", PoissonAlgo." ~ poissonAlgo ~ ");"); 726 } 727 728 /// 729 version(mir_stat_test) 730 @safe pure nothrow @nogc 731 unittest { 732 assert(0.15.binomialInvCDF(6, 2.0 / 3) == 3); 733 // For large values of `n` with `p` not too extreme, can approximate with normal distribution 734 assert(0.5.binomialInvCDF!"approxNormal"(1_000_000, 0.55) == 550_000); 735 // Or closer with continuity correction 736 assert(0.500401.binomialInvCDF!"approxNormalContinuityCorrection"(1_000_000, 0.55) == 550_000); 737 // Poisson approximation is better when `p` is low 738 assert(0.5026596.binomialInvCDF!"approxPoisson"(1_000_000, 0.01) == 10_000); 739 } 740 741 // test BinomialAlgo.direct 742 version(mir_stat_test) 743 @safe pure nothrow @nogc 744 unittest { 745 assert(0.binomialInvCDF(5, 0.6) == 0); 746 assert(1.binomialInvCDF(5, 0.6) == 5); 747 for (double x = 0.05; x < 1; x = x + 0.05) { 748 size_t value = x.binomialInvCDF(5, 0.6); 749 assert(value.binomialCDF(5, 0.6) >= x); 750 assert((value - 1).binomialCDF(5, 0.6) < x); 751 } 752 } 753 754 // test Binomial.direct, alternate guess paths 755 version(mir_stat_test) 756 @safe pure nothrow @nogc 757 unittest { 758 import mir.math.common: approxEqual; 759 760 static immutable int[] ns = [ 25, 37, 34, 25, 25, 105]; 761 static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025]; 762 763 size_t value; 764 for (size_t i; i < 6; i++) { 765 for (double x = 0.05; x < 1; x = x + 0.05) { 766 value = x.binomialInvCDF(ns[i], ps[i]); 767 assert(value.binomialCDF(ns[i], ps[i]) >= x); 768 if (value > 1) 769 assert((value - 1).binomialCDF(ns[i], ps[i]) < x); 770 } 771 } 772 } 773 774 // test Binomial.direct, detailed alternate guess paths 775 version(mir_stat_test_binom_multi) 776 @safe pure nothrow @nogc 777 unittest { 778 import mir.math.common: approxEqual; 779 780 static immutable int[] ns = [ 25, 37, 34, 25, 25, 105]; 781 static immutable double[] ps = [0.55, 0.2, 0.15, 0.05, 1.0e-8, 0.025]; 782 783 size_t value; 784 for (size_t i; i < 6; i++) { 785 for (double x = 0.01; x < 1; x = x + 0.01) { 786 value = x.binomialInvCDF(ns[i], ps[i]); 787 assert(value.binomialCDF(ns[i], ps[i]) >= x); 788 if (value > 1) 789 assert((value - 1).binomialCDF(ns[i], ps[i]) < x); 790 } 791 } 792 } 793 794 // test Binomial.approxNormal / approxNormalContinuityCorrection 795 version(mir_stat_test) 796 @safe pure nothrow @nogc 797 unittest { 798 import mir.math.common: floor, sqrt; 799 import mir.stat.distribution.normal: normalInvCDF; 800 801 assert(0.binomialInvCDF!"approxNormal"(1000, 0.55) == 0); 802 assert(0.binomialInvCDF!"approxNormalContinuityCorrection"(1000, 0.55) == 0); 803 assert(1.binomialInvCDF!"approxNormal"(1000, 0.55) == 1_000); 804 assert(1.binomialInvCDF!"approxNormalContinuityCorrection"(1000, 0.55) == 1_000); 805 double checkValue; 806 for (double x = 0.05; x < 1; x = x + 0.05) { 807 checkValue = normalInvCDF(x, 1_000 * 0.55, sqrt(1000 * 0.55 * 0.45)); 808 assert(x.binomialInvCDF!"approxNormal"(1_000, 0.55) == floor(checkValue)); 809 assert(x.binomialInvCDF!"approxNormalContinuityCorrection"(1_000, 0.55) == floor(checkValue - 0.5)); 810 } 811 } 812 813 // test Binomial.approxPoisson 814 version(mir_stat_test) 815 @safe pure nothrow @nogc 816 unittest { 817 import mir.stat.distribution.poisson: poissonInvCDF; 818 819 assert(0.binomialInvCDF!"approxPoisson"(20, 0.25) == 0); 820 for (double x = 0.05; x < 1; x = x + 0.05) { 821 assert(x.binomialInvCDF!"approxPoisson"(20, 0.25) == poissonInvCDF(x, 20 * 0.25)); 822 assert(x.binomialInvCDF!("approxPoisson", "approxNormalContinuityCorrection")(1_000, 0.55) == poissonInvCDF!"approxNormalContinuityCorrection"(x, 1000 * 0.55)); 823 } 824 } 825 826 /++ 827 Computes the binomial log probability mass function (LPMF) 828 829 Params: 830 k = value to evaluate LPMF (e.g. number of "heads") 831 n = number of trials 832 p = `true` probability 833 834 See_also: 835 $(LINK2 https://en.wikipedia.org/wiki/Binomial_distribution, Binomial Distribution) 836 +/ 837 T binomialLPMF(T)(const size_t k, const size_t n, const T p) 838 if (isFloatingPoint!T) 839 in (k <= n, "k must be less than or equal to n") 840 in (p >= 0, "p must be greater than or equal to 0") 841 in (p <= 1, "p must be less than or equal to 1") 842 { 843 import mir.math.internal.xlogy: xlogy, xlog1py; 844 import mir.math.internal.log_binomial: logBinomialCoefficient; 845 846 return logBinomialCoefficient(n, cast(const uint) k) + xlogy(k, p) + xlog1py((n - k), -p); 847 } 848 849 /// 850 version(mir_stat_test) 851 @safe pure nothrow @nogc 852 unittest { 853 import mir.math.common: approxEqual, exp; 854 855 for (size_t i; i <= 5; i++) { 856 assert(i.binomialLPMF(5, 0.5).exp.approxEqual(binomialPMF(i, 5, 0.5))); 857 assert(i.binomialLPMF(5, 0.75).exp.approxEqual(binomialPMF(i, 5, 0.75))); 858 } 859 } 860 861 /// Accurate values for large values of `n` 862 version(mir_stat_test_fp) 863 @safe pure nothrow @nogc 864 unittest { 865 import mir.bignum.fp: Fp, fp_log; 866 import mir.math.common: approxEqual; 867 868 assert(1.binomialLPMF(1_000_000, 0.75).approxEqual(fp_binomialPMF(1, 1_000_000, Fp!128(0.75)).fp_log!double)); 869 } 870 871 // more values to test 872 version(mir_stat_test_fp) 873 @safe pure nothrow @nogc 874 unittest { 875 import mir.bignum.fp: Fp, fp_log; 876 import mir.math.common: approxEqual; 877 878 enum size_t val = 1_000_000; 879 880 assert(0.binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(0, val + 5, Fp!128(0.75)).fp_log!double)); 881 assert(1.binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(1, val + 5, Fp!128(0.75)).fp_log!double)); 882 assert(2.binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(2, val + 5, Fp!128(0.75)).fp_log!double)); 883 assert(5.binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(5, val + 5, Fp!128(0.75)).fp_log!double)); 884 assert((val / 2).binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(val / 2, val + 5, Fp!128(0.75)).fp_log!double)); 885 assert((val - 5).binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(val - 5, val + 5, Fp!128(0.75)).fp_log!double)); 886 assert((val - 2).binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(val - 2, val + 5, Fp!128(0.75)).fp_log!double)); 887 assert((val - 1).binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(val - 1, val + 5, Fp!128(0.75)).fp_log!double)); 888 assert((val - 0).binomialLPMF(val + 5, 0.75).approxEqual(fp_binomialPMF(val, val + 5, Fp!128(0.75)).fp_log!double)); 889 }