The OpenD Programming Language

1 /++
2 This module contains algorithms for the $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution).
3 
4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
5 
6 Authors: John Michael Hall
7 
8 Copyright: 2023 Mir Stat Authors.
9 
10 +/
11 
12 module mir.stat.distribution.categorical;
13 
14 import mir.algorithm.iteration: all;
15 import mir.internal.utility: isFloatingPoint;
16 import mir.math.common: approxEqual;
17 import mir.math.sum: elementType, sumType, sum;
18 import mir.ndslice.slice: Slice, SliceKind;
19 import std.traits: CommonType;
20 
21 /++
22 Computes the Categorical probability mass function (PMF).
23 
24 Params:
25     x = value to evaluate PMF
26     p = slice containing the probability associated with the Categorical Distribution
27 
28 See_also:
29     $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution)
30 +/
31 @safe pure nothrow @nogc
32 elementType!(Slice!(Iterator, 1, kind)) categoricalPMF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p)
33     if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind))))
34     in (x < p.length, "x must be less than the length of p")
35     in (p.sum.approxEqual(1.0), "p must sum to 1")
36     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
37     in (p.all!("a <= 1"), "p must be less than or equal to 1")
38 {
39     return p[x];
40 }
41 
42 /// ditto
43 @safe pure nothrow @nogc
44 T categoricalPMF(T)(const size_t x, scope const T[] p...)
45     if (isFloatingPoint!T)
46     in (x < p.length, "x must be less than the length of p")
47     in (p.sum.approxEqual(1.0), "p must sum to 1")
48     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
49     in (p.all!("a <= 1"), "p must be less than or equal to 1")
50 {
51     return p[x];
52 }
53 
54 ///
55 version(mir_stat_test)
56 @safe pure nothrow @nogc
57 unittest
58 {
59     import mir.ndslice.slice: sliced;
60     import mir.test: shouldApprox;
61 
62     static immutable x = [0.1, 0.5, 0.4];
63     auto p = x.sliced;
64 
65     0.categoricalPMF(p).shouldApprox == 0.1;
66     1.categoricalPMF(p).shouldApprox == 0.5;
67     2.categoricalPMF(p).shouldApprox == 0.4;
68 }
69 
70 /// Can also use dynamic array
71 version(mir_stat_test)
72 @safe pure nothrow
73 unittest
74 {
75     import mir.test: shouldApprox;
76 
77     double[] p = [0.1, 0.5, 0.4];
78 
79     0.categoricalPMF(p).shouldApprox == 0.1;
80     1.categoricalPMF(p).shouldApprox == 0.5;
81     2.categoricalPMF(p).shouldApprox == 0.4;
82 }
83 
84 ///
85 version(mir_stat_test)
86 @safe pure nothrow @nogc
87 unittest
88 {
89     import mir.math.sum: sum;
90     import mir.ndslice.slice: sliced;
91     import mir.test: shouldApprox;
92 
93     static immutable x = [1.0, 5, 4];
94     auto p = x.sliced;
95     auto q = p / sum(p);
96 
97     0.categoricalPMF(q).shouldApprox == 0.1;
98     1.categoricalPMF(q).shouldApprox == 0.5;
99     2.categoricalPMF(q).shouldApprox == 0.4;
100 }
101 
102 /++
103 Computes the Categorical cumulative distribution function (CDF).
104 
105 Params:
106     x = value to evaluate CDF
107     p = slice containing the probability associated with the Categorical Distribution
108 
109 See_also:
110     $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution)
111 +/
112 @safe pure nothrow @nogc
113 sumType!(Slice!(Iterator, 1, kind)) categoricalCDF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p)
114     if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind))))
115     in (x < p.length, "x must be less than the length of p")
116     in (p.sum.approxEqual(1.0), "p must sum to 1")
117     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
118     in (p.all!("a <= 1"), "p must be less than or equal to 1")
119 {
120     return p[0 .. (x + 1)].sum;
121 }
122 
123 /// ditto
124 @safe pure nothrow @nogc
125 T categoricalCDF(T)(const size_t x, scope const T[] p...)
126     if (isFloatingPoint!T)
127     in (x < p.length, "x must be less than the length of p")
128     in (p.sum.approxEqual(1.0), "p must sum to 1")
129     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
130     in (p.all!("a <= 1"), "p must be less than or equal to 1")
131 {
132     return p[0 .. (x + 1)].sum;
133 }
134 
135 ///
136 version(mir_stat_test)
137 @safe pure nothrow @nogc
138 unittest
139 {
140     import mir.ndslice.slice: sliced;
141     import mir.test: shouldApprox;
142 
143     static immutable x = [0.1, 0.5, 0.4];
144     auto p = x.sliced;
145 
146     0.categoricalCDF(p).shouldApprox == 0.1;
147     1.categoricalCDF(p).shouldApprox == 0.6;
148     2.categoricalCDF(p).shouldApprox == 1.0;
149 }
150 
151 /// Can also use dynamic array
152 version(mir_stat_test)
153 @safe pure nothrow
154 unittest
155 {
156     import mir.test: shouldApprox;
157 
158     double[] p = [0.1, 0.5, 0.4];
159 
160     0.categoricalCDF(p).shouldApprox == 0.1;
161     1.categoricalCDF(p).shouldApprox == 0.6;
162     2.categoricalCDF(p).shouldApprox == 1.0;
163 }
164 
165 /++
166 Computes the Categorical complementary cumulative distribution function (CCDF).
167 
168 Params:
169     x = value to evaluate CCDF
170     p = slice containing the probability associated with the Categorical Distribution
171 
172 See_also:
173     $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution)
174 +/
175 @safe pure nothrow @nogc
176 sumType!(Slice!(Iterator, 1, kind)) categoricalCCDF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p)
177     if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind))))
178     in (x < p.length, "x must be less than the length of p")
179     in (p.sum.approxEqual(1.0), "p must sum to 1")
180     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
181     in (p.all!("a <= 1"), "p must be less than or equal to 1")
182 {
183     return p[x .. $].sum;
184 }
185 
186 /// ditto
187 @safe pure nothrow @nogc
188 T categoricalCCDF(T)(const size_t x, scope const T[] p...)
189     if (isFloatingPoint!T)
190     in (x < p.length, "x must be less than the length of p")
191     in (p.sum.approxEqual(1.0), "p must sum to 1")
192     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
193     in (p.all!("a <= 1"), "p must be less than or equal to 1")
194 {
195     return p[x .. $].sum;
196 }
197 
198 ///
199 version(mir_stat_test)
200 @safe pure nothrow @nogc
201 unittest
202 {
203     import mir.ndslice.slice: sliced;
204     import mir.test: shouldApprox;
205 
206     static immutable x = [0.1, 0.5, 0.4];
207     auto p = x.sliced;
208 
209     0.categoricalCCDF(p).shouldApprox == 1.0;
210     1.categoricalCCDF(p).shouldApprox == 0.9;
211     2.categoricalCCDF(p).shouldApprox == 0.4;
212 }
213 
214 /// Can also use dynamic array
215 version(mir_stat_test)
216 @safe pure nothrow
217 unittest
218 {
219     import mir.test: shouldApprox;
220 
221     double[] p = [0.1, 0.5, 0.4];
222 
223     0.categoricalCCDF(p).shouldApprox == 1.0;
224     1.categoricalCCDF(p).shouldApprox == 0.9;
225     2.categoricalCCDF(p).shouldApprox == 0.4;
226 }
227 
228 /++
229 Computes the Categorical inverse cumulative distribution function (InvCDF).
230 
231 Params:
232     q = value to evaluate InvCDF
233     p = slice containing the probability associated with the Categorical Distribution
234 
235 See_also:
236     $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution)
237 +/
238 @safe pure nothrow @nogc
239 size_t categoricalInvCDF(T, Iterator, SliceKind kind)(const T q, scope const Slice!(Iterator, 1, kind) p)
240     if (isFloatingPoint!(CommonType!(T, elementType!(Slice!(Iterator, 1, kind)))))
241     in (q >= 0, "q must be greater than or equal to 0")
242     in (q <= 1, "q must be less than or equal to 1")
243     in (p.sum.approxEqual(1.0), "p must sum to 1")
244     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
245     in (p.all!("a <= 1"), "p must be less than or equal to 1")
246 {
247     CommonType!(T, elementType!(typeof(p))) s = 0.0;
248     size_t i;
249     s += p[i];
250     while (q > s) {
251         i++;
252         s += p[i];
253     }
254     return i;// this ensures categoricalInvCDF(a, p) == b, which is consistent with categoricalCDF(b, p) == a (similar to bernoulliInvCDF)
255 }
256 
257 /// ditto
258 @safe pure nothrow @nogc
259 size_t categoricalInvCDF(T)(const T q, scope const T[] p...)
260     if (isFloatingPoint!T)
261     in (q >= 0, "q must be greater than or equal to 0")
262     in (q <= 1, "q must be less than or equal to 1")
263     in (p.sum.approxEqual(1.0), "p must sum to 1")
264     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
265     in (p.all!("a <= 1"), "p must be less than or equal to 1")
266 {
267     T s = 0.0;
268     size_t i;
269     s += p[i];
270     while (q > s) {
271         i++;
272         s += p[i];
273     }
274     return i;// this ensures categoricalInvCDF(a, p) == b, which is consistent with categoricalCDF(b, p) == a (similar to bernoulliInvCDF)
275 }
276 
277 ///
278 version(mir_stat_test)
279 @safe pure nothrow @nogc
280 unittest
281 {
282     import mir.ndslice.slice: sliced;
283     import mir.test: should;
284 
285     static immutable x = [0.1, 0.5, 0.4];
286     auto p = x.sliced;
287 
288     categoricalInvCDF(0.0, p).should == 0;
289     categoricalInvCDF(0.1, p).should == 0;
290     categoricalInvCDF(0.2, p).should == 1;
291     categoricalInvCDF(0.3, p).should == 1;
292     categoricalInvCDF(0.4, p).should == 1;
293     categoricalInvCDF(0.5, p).should == 1;
294     categoricalInvCDF(0.6, p).should == 1;
295     categoricalInvCDF(0.7, p).should == 2;
296     categoricalInvCDF(0.8, p).should == 2;
297     categoricalInvCDF(0.9, p).should == 2;
298     categoricalInvCDF(1.0, p).should == 2;
299 }
300 
301 /// Can also use dynamic array
302 version(mir_stat_test)
303 @safe pure nothrow
304 unittest
305 {
306     import mir.test: should;
307 
308     double[] p = [0.1, 0.5, 0.4];
309 
310     categoricalInvCDF(0.5, p).should == 1;
311 }
312 
313 /++
314 Computes the Categorical log probability mass function (LPMF).
315 
316 Params:
317     x = value to evaluate LPMF
318     p = slice containing the probability associated with the Categorical Distribution
319 
320 See_also:
321     $(LINK2 https://en.wikipedia.org/wiki/Categorical_distribution, Categorical Distribution)
322 +/
323 @safe pure nothrow @nogc
324 elementType!(Slice!(Iterator, 1, kind)) categoricalLPMF(Iterator, SliceKind kind)(const size_t x, scope const Slice!(Iterator, 1, kind) p)
325     if (isFloatingPoint!(elementType!(Slice!(Iterator, 1, kind))))
326     in (x < p.length, "x must be less than the length of p")
327     in (p.sum.approxEqual(1.0), "p must sum to 1")
328     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
329     in (p.all!("a <= 1"), "p must be less than or equal to 1")
330 {
331     import mir.math.common: log;
332     return x.categoricalPMF(p).log;
333 }
334 
335 /// ditto
336 @safe pure nothrow @nogc
337 T categoricalLPMF(T)(const size_t x, scope const T[] p...)
338     if (isFloatingPoint!T)
339     in (x < p.length, "x must be less than the length of p")
340     in (p.sum.approxEqual(1.0), "p must sum to 1")
341     in (p.all!("a >= 0"), "p must be greater than or equal to 0")
342     in (p.all!("a <= 1"), "p must be less than or equal to 1")
343 {
344     import mir.math.common: log;
345     return x.categoricalPMF(p).log;
346 }
347 
348 ///
349 version(mir_stat_test)
350 @safe pure nothrow @nogc
351 unittest
352 {
353     import mir.math.common: log;
354     import mir.ndslice.slice: sliced;
355     import mir.test: shouldApprox;
356 
357     static immutable x = [0.1, 0.5, 0.4];
358     auto p = x.sliced;
359 
360     0.categoricalLPMF(p).shouldApprox == log(0.1);
361     1.categoricalLPMF(p).shouldApprox == log(0.5);
362     2.categoricalLPMF(p).shouldApprox == log(0.4);
363 }
364 
365 /// Can also use dynamic array
366 version(mir_stat_test)
367 @safe pure nothrow
368 unittest
369 {
370     import mir.math.common: log;
371     import mir.test: shouldApprox;
372 
373     double[] p = [0.1, 0.5, 0.4];
374 
375     0.categoricalLPMF(p).shouldApprox == log(0.1);
376     1.categoricalLPMF(p).shouldApprox == log(0.5);
377     2.categoricalLPMF(p).shouldApprox == log(0.4);
378 }