1 /++ 2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution). 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 6 Authors: John Michael Hall 7 8 Copyright: 2023 Mir Stat Authors. 9 10 +/ 11 12 module mir.stat.distribution.categorical; 13 14 import mir.algorithm.iteration: all; 15 import mir.internal.utility: isFloatingPoint; 16 import mir.math.common: approxEqual; 17 import mir.math.sum: elementType, sumType, sum; 18 import mir.ndslice.slice: Slice, SliceKind; 19 import std.traits: CommonType; 20 21 /++ 22 Computes the Categorical probability mass function (PMF). 23 24 Params: 25 x = value to evaluate PMF 26 p = slice containing the probability associated with the Categorical Distribution 27 28 See_also: 29 $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution) 30 +/ 31 @safe pure nothrow @nogc 32 elementType!(Slice!(Iterator, 1, kind)) categoricalPMF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p) 33 if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind)))) 34 in (x < p.length, "x must be less than the length of p") 35 in (p.sum.approxEqual(1.0), "p must sum to 1") 36 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 37 in (p.all!("a <= 1"), "p must be less than or equal to 1") 38 { 39 return p[x]; 40 } 41 42 /// ditto 43 @safe pure nothrow @nogc 44 T categoricalPMF(T)(const size_t x, scope const T[] p...) 45 if (isFloatingPoint!T) 46 in (x < p.length, "x must be less than the length of p") 47 in (p.sum.approxEqual(1.0), "p must sum to 1") 48 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 49 in (p.all!("a <= 1"), "p must be less than or equal to 1") 50 { 51 return p[x]; 52 } 53 54 /// 55 version(mir_stat_test) 56 @safe pure nothrow @nogc 57 unittest 58 { 59 import mir.ndslice.slice: sliced; 60 import mir.test: shouldApprox; 61 62 static immutable x = [0.1, 0.5, 0.4]; 63 auto p = x.sliced; 64 65 0.categoricalPMF(p).shouldApprox == 0.1; 66 1.categoricalPMF(p).shouldApprox == 0.5; 67 2.categoricalPMF(p).shouldApprox == 0.4; 68 } 69 70 /// Can also use dynamic array 71 version(mir_stat_test) 72 @safe pure nothrow 73 unittest 74 { 75 import mir.test: shouldApprox; 76 77 double[] p = [0.1, 0.5, 0.4]; 78 79 0.categoricalPMF(p).shouldApprox == 0.1; 80 1.categoricalPMF(p).shouldApprox == 0.5; 81 2.categoricalPMF(p).shouldApprox == 0.4; 82 } 83 84 /// 85 version(mir_stat_test) 86 @safe pure nothrow @nogc 87 unittest 88 { 89 import mir.math.sum: sum; 90 import mir.ndslice.slice: sliced; 91 import mir.test: shouldApprox; 92 93 static immutable x = [1.0, 5, 4]; 94 auto p = x.sliced; 95 auto q = p / sum(p); 96 97 0.categoricalPMF(q).shouldApprox == 0.1; 98 1.categoricalPMF(q).shouldApprox == 0.5; 99 2.categoricalPMF(q).shouldApprox == 0.4; 100 } 101 102 /++ 103 Computes the Categorical cumulative distribution function (CDF). 104 105 Params: 106 x = value to evaluate CDF 107 p = slice containing the probability associated with the Categorical Distribution 108 109 See_also: 110 $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution) 111 +/ 112 @safe pure nothrow @nogc 113 sumType!(Slice!(Iterator, 1, kind)) categoricalCDF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p) 114 if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind)))) 115 in (x < p.length, "x must be less than the length of p") 116 in (p.sum.approxEqual(1.0), "p must sum to 1") 117 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 118 in (p.all!("a <= 1"), "p must be less than or equal to 1") 119 { 120 return p[0 .. (x + 1)].sum; 121 } 122 123 /// ditto 124 @safe pure nothrow @nogc 125 T categoricalCDF(T)(const size_t x, scope const T[] p...) 126 if (isFloatingPoint!T) 127 in (x < p.length, "x must be less than the length of p") 128 in (p.sum.approxEqual(1.0), "p must sum to 1") 129 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 130 in (p.all!("a <= 1"), "p must be less than or equal to 1") 131 { 132 return p[0 .. (x + 1)].sum; 133 } 134 135 /// 136 version(mir_stat_test) 137 @safe pure nothrow @nogc 138 unittest 139 { 140 import mir.ndslice.slice: sliced; 141 import mir.test: shouldApprox; 142 143 static immutable x = [0.1, 0.5, 0.4]; 144 auto p = x.sliced; 145 146 0.categoricalCDF(p).shouldApprox == 0.1; 147 1.categoricalCDF(p).shouldApprox == 0.6; 148 2.categoricalCDF(p).shouldApprox == 1.0; 149 } 150 151 /// Can also use dynamic array 152 version(mir_stat_test) 153 @safe pure nothrow 154 unittest 155 { 156 import mir.test: shouldApprox; 157 158 double[] p = [0.1, 0.5, 0.4]; 159 160 0.categoricalCDF(p).shouldApprox == 0.1; 161 1.categoricalCDF(p).shouldApprox == 0.6; 162 2.categoricalCDF(p).shouldApprox == 1.0; 163 } 164 165 /++ 166 Computes the Categorical complementary cumulative distribution function (CCDF). 167 168 Params: 169 x = value to evaluate CCDF 170 p = slice containing the probability associated with the Categorical Distribution 171 172 See_also: 173 $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution) 174 +/ 175 @safe pure nothrow @nogc 176 sumType!(Slice!(Iterator, 1, kind)) categoricalCCDF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p) 177 if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind)))) 178 in (x < p.length, "x must be less than the length of p") 179 in (p.sum.approxEqual(1.0), "p must sum to 1") 180 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 181 in (p.all!("a <= 1"), "p must be less than or equal to 1") 182 { 183 return p[x .. $].sum; 184 } 185 186 /// ditto 187 @safe pure nothrow @nogc 188 T categoricalCCDF(T)(const size_t x, scope const T[] p...) 189 if (isFloatingPoint!T) 190 in (x < p.length, "x must be less than the length of p") 191 in (p.sum.approxEqual(1.0), "p must sum to 1") 192 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 193 in (p.all!("a <= 1"), "p must be less than or equal to 1") 194 { 195 return p[x .. $].sum; 196 } 197 198 /// 199 version(mir_stat_test) 200 @safe pure nothrow @nogc 201 unittest 202 { 203 import mir.ndslice.slice: sliced; 204 import mir.test: shouldApprox; 205 206 static immutable x = [0.1, 0.5, 0.4]; 207 auto p = x.sliced; 208 209 0.categoricalCCDF(p).shouldApprox == 1.0; 210 1.categoricalCCDF(p).shouldApprox == 0.9; 211 2.categoricalCCDF(p).shouldApprox == 0.4; 212 } 213 214 /// Can also use dynamic array 215 version(mir_stat_test) 216 @safe pure nothrow 217 unittest 218 { 219 import mir.test: shouldApprox; 220 221 double[] p = [0.1, 0.5, 0.4]; 222 223 0.categoricalCCDF(p).shouldApprox == 1.0; 224 1.categoricalCCDF(p).shouldApprox == 0.9; 225 2.categoricalCCDF(p).shouldApprox == 0.4; 226 } 227 228 /++ 229 Computes the Categorical inverse cumulative distribution function (InvCDF). 230 231 Params: 232 q = value to evaluate InvCDF 233 p = slice containing the probability associated with the Categorical Distribution 234 235 See_also: 236 $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution) 237 +/ 238 @safe pure nothrow @nogc 239 size_t categoricalInvCDF(T, Iterator, SliceKind kind)(const T q, scope const Slice!(Iterator, 1, kind) p) 240 if (isFloatingPoint!(CommonType!(T, elementType!(Slice!(Iterator, 1, kind))))) 241 in (q >= 0, "q must be greater than or equal to 0") 242 in (q <= 1, "q must be less than or equal to 1") 243 in (p.sum.approxEqual(1.0), "p must sum to 1") 244 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 245 in (p.all!("a <= 1"), "p must be less than or equal to 1") 246 { 247 CommonType!(T, elementType!(typeof(p))) s = 0.0; 248 size_t i; 249 s += p[i]; 250 while (q > s) { 251 i++; 252 s += p[i]; 253 } 254 return i;// this ensures categoricalInvCDF(a, p) == b, which is consistent with categoricalCDF(b, p) == a (similar to bernoulliInvCDF) 255 } 256 257 /// ditto 258 @safe pure nothrow @nogc 259 size_t categoricalInvCDF(T)(const T q, scope const T[] p...) 260 if (isFloatingPoint!T) 261 in (q >= 0, "q must be greater than or equal to 0") 262 in (q <= 1, "q must be less than or equal to 1") 263 in (p.sum.approxEqual(1.0), "p must sum to 1") 264 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 265 in (p.all!("a <= 1"), "p must be less than or equal to 1") 266 { 267 T s = 0.0; 268 size_t i; 269 s += p[i]; 270 while (q > s) { 271 i++; 272 s += p[i]; 273 } 274 return i;// this ensures categoricalInvCDF(a, p) == b, which is consistent with categoricalCDF(b, p) == a (similar to bernoulliInvCDF) 275 } 276 277 /// 278 version(mir_stat_test) 279 @safe pure nothrow @nogc 280 unittest 281 { 282 import mir.ndslice.slice: sliced; 283 import mir.test: should; 284 285 static immutable x = [0.1, 0.5, 0.4]; 286 auto p = x.sliced; 287 288 categoricalInvCDF(0.0, p).should == 0; 289 categoricalInvCDF(0.1, p).should == 0; 290 categoricalInvCDF(0.2, p).should == 1; 291 categoricalInvCDF(0.3, p).should == 1; 292 categoricalInvCDF(0.4, p).should == 1; 293 categoricalInvCDF(0.5, p).should == 1; 294 categoricalInvCDF(0.6, p).should == 1; 295 categoricalInvCDF(0.7, p).should == 2; 296 categoricalInvCDF(0.8, p).should == 2; 297 categoricalInvCDF(0.9, p).should == 2; 298 categoricalInvCDF(1.0, p).should == 2; 299 } 300 301 /// Can also use dynamic array 302 version(mir_stat_test) 303 @safe pure nothrow 304 unittest 305 { 306 import mir.test: should; 307 308 double[] p = [0.1, 0.5, 0.4]; 309 310 categoricalInvCDF(0.5, p).should == 1; 311 } 312 313 /++ 314 Computes the Categorical log probability mass function (LPMF). 315 316 Params: 317 x = value to evaluate LPMF 318 p = slice containing the probability associated with the Categorical Distribution 319 320 See_also: 321 $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution) 322 +/ 323 @safe pure nothrow @nogc 324 elementType!(Slice!(Iterator, 1, kind)) categoricalLPMF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p) 325 if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind)))) 326 in (x < p.length, "x must be less than the length of p") 327 in (p.sum.approxEqual(1.0), "p must sum to 1") 328 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 329 in (p.all!("a <= 1"), "p must be less than or equal to 1") 330 { 331 import mir.math.common: log; 332 return x.categoricalPMF(p).log; 333 } 334 335 /// ditto 336 @safe pure nothrow @nogc 337 T categoricalLPMF(T)(const size_t x, scope const T[] p...) 338 if (isFloatingPoint!T) 339 in (x < p.length, "x must be less than the length of p") 340 in (p.sum.approxEqual(1.0), "p must sum to 1") 341 in (p.all!("a >= 0"), "p must be greater than or equal to 0") 342 in (p.all!("a <= 1"), "p must be less than or equal to 1") 343 { 344 import mir.math.common: log; 345 return x.categoricalPMF(p).log; 346 } 347 348 /// 349 version(mir_stat_test) 350 @safe pure nothrow @nogc 351 unittest 352 { 353 import mir.math.common: log; 354 import mir.ndslice.slice: sliced; 355 import mir.test: shouldApprox; 356 357 static immutable x = [0.1, 0.5, 0.4]; 358 auto p = x.sliced; 359 360 0.categoricalLPMF(p).shouldApprox == log(0.1); 361 1.categoricalLPMF(p).shouldApprox == log(0.5); 362 2.categoricalLPMF(p).shouldApprox == log(0.4); 363 } 364 365 /// Can also use dynamic array 366 version(mir_stat_test) 367 @safe pure nothrow 368 unittest 369 { 370 import mir.math.common: log; 371 import mir.test: shouldApprox; 372 373 double[] p = [0.1, 0.5, 0.4]; 374 375 0.categoricalLPMF(p).shouldApprox == log(0.1); 376 1.categoricalLPMF(p).shouldApprox == log(0.5); 377 2.categoricalLPMF(p).shouldApprox == log(0.4); 378 }