mir.stat.transform source code

1 /++
2 This module contains algorithms for transforming data that are useful in
3 statistical applications.
4 
5 $(SCRIPT inhibitQuickIndex = 1;)
6 $(DIVC quickindex,
7 $(BOOKTABLE ,
8 $(TR $(TH Function Name) $(TH Description)
9 )
10     $(TR $(TD $(LREF center))
11         $(TD Subtracts the mean (or using some other function) from each element
12         of a slice.
13     ))
14     $(TR $(TD $(LREF robustScale))
15         $(TD Subtracts the median and divides by the difference between a lower
16         and upper quantile from each element of a slice.
17     ))
18     $(TR $(TD $(LREF scale))
19         $(TD Subtracts the mean (or using some other function) and divides by
20         the standard deviation (or using some other function) from each element
21         of a slice.
22     ))
23     $(TR $(TD $(LREF sweep))
24         $(TD Applies a function and an operation to each element of a slice.
25     ))
26     $(TR $(TD $(LREF zscore))
27         $(TD Subtracts the mean and divides by the standard deviation from each
28         element of a slice.
29     ))
30 ))
31 
32 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
33 
34 The $(LREF center) function is borrowed from $(HTTP mir-algorithm.$(MIR_SITE)/mir_math_stat.html, mir.math.stat).
35 
36 Authors: John Michael Hall, Ilya Yaroshenko
37 
38 Copyright: 2022-3 Mir Stat Authors.
39 
40 Macros:
41 SUBREF = $(REF_ALTTEXT $(TT $2), $2, mir, stat, $1)$(NBSP)
42 SUB2REF = $(REF_ALTTEXT $(TT $2), $2, mir, stat, descriptive, $1)$(NBSP)
43 MATHREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, math, $1)$(NBSP)
44 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
45 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4))
46 
47 +/
48 
49 module mir.stat.transform;
50 
51 import mir.math.common: fmamath;
52 import mir.math.sum: Summation;
53 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind;
54 import mir.stat.descriptive.univariate: mean, QuantileAlgo, standardDeviation, VarianceAlgo;
55 
56 /++
57 Centers `slice`, which must be a finite iterable.
58 
59 By default, `slice` is centered by the mean. A custom function may also be
60 provided using `centralTendency`.
61 
62 Returns:
63     The elements in the slice with the average subtracted from them.
64 
65 See_also:
66     $(SUB2REF univariate, mean)
67 +/
68 template center(alias centralTendency = mean!(Summation.appropriate))
69 {
70     import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind;
71     /++
72     Params:
73         slice = slice
74     +/
75     auto center(Iterator, size_t N, SliceKind kind)(
76         Slice!(Iterator, N, kind) slice)
77     {
78         import core.lifetime: move;
79         import mir.ndslice.internal: LeftOp, ImplicitlyUnqual;
80         import mir.ndslice.topology: vmap;
81 
82         auto m = centralTendency(slice.lightScope);
83         alias T = typeof(m);
84         return slice.move.vmap(LeftOp!("-", ImplicitlyUnqual!T)(m));
85     }
86     
87     /// ditto
88     auto center(T)(T x)
89         if (isConvertibleToSlice!T && !isSlice!T)
90     {
91         import mir.ndslice.slice: toSlice;
92         return center(x.toSlice);
93     }
94 }
95 
96 /// Center vector
97 version(mir_stat_test)
98 @safe pure nothrow
99 unittest
100 {
101     import mir.algorithm.iteration: all;
102     import mir.math.common: approxEqual;
103     import mir.ndslice.slice: sliced;
104     import mir.stat.descriptive.univariate: gmean, hmean, median;
105 
106     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
107     assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
108     
109     // Can center using different functions
110     assert(x.center!hmean.all!approxEqual([-1.44898, -0.44898, 0.55102, 1.55102, 2.55102, 3.55102]));
111     assert(x.center!gmean.all!approxEqual([-1.99379516, -0.99379516, 0.00620483, 1.00620483, 2.00620483, 3.00620483]));
112     assert(x.center!median.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
113 
114     // center operates lazily, if original slice is changed, then 
115     auto y = x.center;
116     assert(y.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
117     x[0]++;
118     assert(y.all!approxEqual([-1.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
119 }
120 
121 /// Example of lazy behavior of center
122 version(mir_stat_test)
123 @safe pure nothrow
124 unittest
125 {
126     import mir.algorithm.iteration: all;
127     import mir.math.common: approxEqual;
128     import mir.ndslice.allocation: slice;
129     import mir.ndslice.slice: sliced;
130 
131     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
132     auto y = x.center;
133     auto z = x.center.slice;
134     assert(y.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
135     x[0]++;
136     // y changes, while z does not
137     assert(y.all!approxEqual([-1.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
138     assert(z.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));   
139 }
140 
141 /// Center dynamic array
142 version(mir_stat_test)
143 @safe pure nothrow
144 unittest
145 {
146     import mir.algorithm.iteration: all;
147     import mir.math.common: approxEqual;
148 
149     auto x = [1.0, 2, 3, 4, 5, 6];
150     assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
151 }
152 
153 /// Center matrix
154 version(mir_stat_test)
155 @safe pure
156 unittest
157 {
158     import mir.algorithm.iteration: all;
159     import mir.math.common: approxEqual;
160     import mir.ndslice: fuse;
161     
162     auto x = [
163         [0.0, 1, 2], 
164         [3.0, 4, 5]
165     ].fuse;
166     
167     auto y = [
168         [-2.5, -1.5, -0.5], 
169         [ 0.5,  1.5,  2.5]
170     ].fuse;
171     
172     assert(x.center.all!approxEqual(y));
173 }
174 
175 /// Column center matrix
176 version(mir_stat_test)
177 @safe pure
178 unittest
179 {
180     import mir.algorithm.iteration: all, equal;
181     import mir.math.common: approxEqual;
182     import mir.ndslice: fuse;
183     import mir.ndslice.topology: alongDim, byDim, map;
184 
185     auto x = [
186         [20.0, 100.0, 2000.0],
187         [10.0,   5.0,    2.0]
188     ].fuse;
189 
190     auto result = [
191         [ 5.0,  47.5,  999],
192         [-5.0, -47.5, -999]
193     ].fuse;
194 
195     // Use byDim with map to compute average of row/column.
196     auto xCenterByDim = x.byDim!1.map!center;
197     auto resultByDim = result.byDim!1;
198     assert(xCenterByDim.equal!(equal!approxEqual)(resultByDim));
199 
200     auto xCenterAlongDim = x.alongDim!0.map!center;
201     auto resultAlongDim = result.alongDim!0;
202     assert(xCenterByDim.equal!(equal!approxEqual)(resultAlongDim));
203 }
204 
205 /// Can also pass arguments to average function used by center
206 version(mir_stat_test)
207 pure @safe nothrow
208 unittest
209 {
210     import mir.ndslice.slice: sliced;
211     import mir.stat.descriptive.univariate: mean;
212 
213     //Set sum algorithm or output type
214     auto a = [1, 1e100, 1, -1e100];
215 
216     auto x = a.sliced * 10_000;
217 
218     //Due to Floating Point precision, subtracting the mean from the second
219     //and fourth numbers in `x` does not change the value of the result
220     auto result = [5000, 1e104, 5000, -1e104].sliced;
221 
222     assert(x.center!(mean!"kbn") == result);
223     assert(x.center!(mean!"kb2") == result);
224     assert(x.center!(mean!"precise") == result);
225 }
226 
227 /++
228 Passing a centered input to `variance` or `standardDeviation` with the
229 `assumeZeroMean` algorithm is equivalent to calculating `variance` or
230 `standardDeviation` on the original input.
231 +/
232 version(mir_stat_test)
233 @safe pure nothrow
234 unittest
235 {
236     import mir.algorithm.iteration: all;
237     import mir.math.common: approxEqual;
238     import mir.ndslice.slice: sliced;
239     import mir.stat.descriptive.univariate: standardDeviation, variance;
240 
241     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
242     assert(x.center.variance!"assumeZeroMean".approxEqual(x.variance));
243     assert(x.center.standardDeviation!"assumeZeroMean".approxEqual(x.standardDeviation));
244 }
245 
246 // dynamic array test
247 version(mir_stat_test)
248 @safe pure nothrow
249 unittest
250 {
251     import mir.algorithm.iteration: all;
252     import mir.math.common: approxEqual;
253 
254     double[] x = [1.0, 2, 3, 4, 5, 6];
255 
256     assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
257 }
258 
259 // withAsSlice test
260 version(mir_stat_test)
261 @safe pure nothrow @nogc
262 unittest
263 {
264     import mir.algorithm.iteration: all;
265     import mir.math.common: approxEqual;
266     import mir.rc.array: RCArray;
267 
268     static immutable a = [1.0, 2, 3, 4, 5, 6];
269     static immutable result = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5];
270 
271     auto x = RCArray!double(6);
272     foreach(i, ref e; x)
273         e = a[i];
274 
275     assert(x.center.all!approxEqual(result));
276 }
277 
278 /++
279 For each `e` of the input, applies `e op m` where `m` is the result of `fun` and
280 `op` is an operation, such as `"+"`, `"-"`, `"*"`, or `"/"`. For instance, if
281 `op = "-"`, then this function computes `e - m` for each `e` of the input and
282 where `m` is the result of applying `fun` to the input.
283 Overloads are provided to directly provide `m` to the function, rather than
284 calculate it using `fun`.
285 
286 Params:
287     fun = function used to sweep
288     op = operation
289 Returns:
290     The input 
291 See_also:
292     $(LREF center),
293     $(LREF scale)
294 +/
295 template sweep(alias fun, string op)
296 {
297     /++
298     Params:
299         slice = slice
300     +/
301     @fmamath auto sweep(Iterator, size_t N, SliceKind kind)(
302         Slice!(Iterator, N, kind) slice)
303     {
304         import core.lifetime: move;
305 
306         auto m = fun(slice.lightScope);
307         return .sweep!op(slice.move, m);
308     }
309 
310     /// ditto
311     @fmamath auto sweep(SliceLike)(SliceLike x)
312         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
313     {
314         import mir.ndslice.slice: toSlice;
315         return sweep(x.toSlice);
316     }
317 }
318 
319 /++
320 Params:
321     op = operation
322 +/
323 template sweep(string op)
324 {
325     /++
326     Params:
327         slice = slice
328         m = value to pass to vmap
329     +/
330     @fmamath auto sweep(Iterator, size_t N, SliceKind kind, T)(
331                Slice!(Iterator, N, kind) slice, T m)
332     {
333         import core.lifetime: move;
334         import mir.ndslice.internal: LeftOp, ImplicitlyUnqual;
335         import mir.ndslice.topology: vmap;
336 
337         return slice.move.vmap(LeftOp!(op, ImplicitlyUnqual!T)(m));
338     }
339 
340     /// ditto
341     @fmamath auto sweep(SliceLike, T)(SliceLike x, T m)
342         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
343     {
344         import mir.ndslice.slice: toSlice;
345         return sweep(x.toSlice, m);
346     }
347 }
348 
349 /// Sweep vector
350 version(mir_stat_test)
351 @safe pure nothrow
352 unittest
353 {
354     import mir.algorithm.iteration: all;
355     import mir.math.common: approxEqual;
356     import mir.ndslice.slice: sliced;
357 
358     static double f(T)(T x) {
359         return 3.5;
360     }
361 
362     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
363     assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
364     assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
365     assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
366 }
367 
368 /// Sweep dynamic array
369 version(mir_stat_test)
370 @safe pure nothrow
371 unittest
372 {
373     import mir.algorithm.iteration: all;
374     import mir.math.common: approxEqual;
375 
376     static double f(T)(T x) {
377         return 3.5;
378     }
379 
380     auto x = [1.0, 2, 3, 4, 5, 6];
381     assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
382     assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
383     assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
384 }
385 
386 /// Sweep matrix
387 version(mir_stat_test)
388 @safe pure
389 unittest
390 {
391     import mir.algorithm.iteration: all;
392     import mir.math.common: approxEqual;
393     import mir.ndslice.fuse: fuse;
394 
395     static double f(T)(T x) {
396         return 3.5;
397     }
398 
399     auto x = [
400         [1.0, 2, 3],
401         [4.0, 5, 6]
402     ].fuse;
403 
404     auto y0 = [
405         [-2.5, -1.5, -0.5],
406         [ 0.5,  1.5,  2.5]
407     ];
408 
409     auto y1 = [
410         [4.5, 5.5, 6.5],
411         [7.5, 8.5, 9.5]
412     ];
413 
414     assert(x.sweep!(f, "-").all!approxEqual(y0));
415     assert(x.sweep!"-"(3.5).all!approxEqual(y0));
416     assert(x.sweep!(f, "+").all!approxEqual(y1));
417 }
418 
419 /// Column sweep matrix
420 version(mir_stat_test)
421 @safe pure
422 unittest
423 {
424     import mir.algorithm.iteration: all, equal;
425     import mir.math.common: approxEqual;
426     import mir.ndslice.fuse: fuse;
427     import mir.ndslice.topology: alongDim, byDim, map;
428 
429     static double f(T)(T x) {
430         return 0.5 * (x[0] +x[1]);
431     }
432 
433     auto x = [
434         [20.0, 100.0, 2000.0],
435         [10.0,   5.0,    2.0]
436     ].fuse;
437 
438     auto result = [
439         [ 5.0,  47.5,  999],
440         [-5.0, -47.5, -999]
441     ].fuse;
442 
443     // Use byDim with map to sweep mean of row/column.
444     auto xSweepByDim = x.byDim!1.map!(sweep!(f, "-"));
445     auto resultByDim = result.byDim!1;
446     assert(xSweepByDim.equal!(equal!approxEqual)(resultByDim));
447 
448     auto xSweepAlongDim = x.alongDim!0.map!(sweep!(f, "-"));
449     auto resultAlongDim = result.alongDim!0;
450     assert(xSweepAlongDim.equal!(equal!approxEqual)(resultAlongDim));
451 }
452 
453 /// Can also pass arguments to sweep function
454 version(mir_stat_test)
455 @safe pure nothrow
456 unittest
457 {
458     import mir.algorithm.iteration: all;
459     import mir.math.common: approxEqual;
460     import mir.ndslice.slice: sliced;
461 
462     static double f(T)(T x, double a) {
463         return a;
464     }
465 
466     static double g(double a, T)(T x) {
467         return a;
468     }
469 
470     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
471     assert(x.sweep!(a => f(a, 3.5), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
472     assert(x.sweep!(a => f(a, 3.5), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
473     assert(x.sweep!(a => g!3.5(a), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]));
474     assert(x.sweep!(a => g!3.5(a), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5]));
475 }
476 
477 /// Sweep withAsSlice
478 version(mir_stat_test)
479 @safe pure nothrow @nogc
480 unittest
481 {
482     import mir.algorithm.iteration: all;
483     import mir.math.common: approxEqual;
484     import mir.rc.array: RCArray;
485 
486     static double f(T)(T x) {
487         return 3.5;
488     }
489 
490     auto x = RCArray!double(6);
491     foreach(i, ref e; x)
492         e = i + 1;
493 
494     static immutable result1 = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5];
495     assert(x.sweep!(f, "-").all!approxEqual(result1));
496     assert(x.sweep!"-"(3.5).all!approxEqual(result1));
497     static immutable result2 = [4.5, 5.5, 6.5, 7.5, 8.5, 9.5];
498     assert(x.sweep!(f, "+").all!approxEqual(result2));
499 }
500 
501 /++
502 Scales the input.
503 
504 By default, the input is first centered using the mean of the input. A custom
505 function may also be provided using `centralTendency`. The centered input is
506 then divided by the sample standard deviation of the input. A custom function
507 may also be provided using `dispersion`.
508 
509 Overloads are also provided to scale with variables `m` and `d`, which
510 correspond to the results of `centralTendency` and `dispersion`. This function
511 is equivalent to `center` when passing `d = 1`.
512 
513 Params:
514     centralTendency = function used to center input, default is `mean`
515     dispersion = function used as divisor, default is `dispersion`
516 Returns:
517     The scaled result
518 See_also:
519     $(LREF center),
520     $(SUB2REF univariate, VarianceAlgo),
521     $(MATHREF sum, Summation),
522     $(SUB2REF univariate, mean),
523     $(SUB2REF univariate, standardDeviation),
524     $(SUB2REF univariate, median),
525     $(SUB2REF univariate, gmean),
526     $(SUB2REF univariate, hmean),
527     $(SUB2REF univariate, variance),
528     $(SUB2REF univariate, dispersion)
529 +/
530 template scale(alias centralTendency = mean!(Summation.appropriate),
531                alias dispersion = standardDeviation!(VarianceAlgo.hybrid, Summation.appropriate))
532 {
533     /++
534     Params:
535         slice = slice
536     +/
537     @fmamath auto scale(Iterator, size_t N, SliceKind kind)(
538         Slice!(Iterator, N, kind) slice)
539     {
540         import core.lifetime: move;
541 
542         auto m = centralTendency(slice.lightScope);
543         auto d = dispersion(slice.lightScope);
544         return .scale!(Iterator, N, kind, typeof(m))(slice.move, m, d);
545     }
546     
547     /// ditto
548     @fmamath auto scale(SliceLike)(SliceLike x)
549         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
550     {
551         import mir.ndslice.slice: toSlice;
552         return scale(x.toSlice);
553     }
554 }
555 
556 /++
557 Params:
558     slice = slice
559     m = value to subtract from slice
560     d = value to divide slice by
561 +/
562 @fmamath auto scale(Iterator, size_t N, SliceKind kind, T, U)(
563            Slice!(Iterator, N, kind) slice, T m, U d)
564 {
565     import core.lifetime: move;
566     import mir.ndslice.internal: LeftOp, ImplicitlyUnqual;
567     import mir.ndslice.topology: vmap;
568     
569     assert(d > 0, "scale: cannot divide by zero");
570 
571     return slice.move.sweep!"-"(m).sweep!"/"(d);
572 }
573     
574 /// ditto
575 @fmamath auto scale(SliceLike, T, U)(SliceLike x, T m, U d)
576     if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
577 {
578     import mir.ndslice.slice: toSlice;
579     return scale(x.toSlice, m, d);
580 }
581 
582 /// Scale vector
583 version(mir_stat_test)
584 @safe pure nothrow
585 unittest
586 {
587     import mir.algorithm.iteration: all;
588     import mir.math.common: approxEqual;
589     import mir.ndslice.slice: sliced;
590     import mir.stat.descriptive.univariate: mean, gmean, hmean, median, standardDeviation;
591 
592     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
593 
594     assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
595     assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
596     
597     // Can scale using different `centralTendency` functions
598     assert(x.scale!hmean.all!approxEqual([-0.774512, -0.23999, 0.294533, 0.829055, 1.363578, 1.898100]));
599     assert(x.scale!gmean.all!approxEqual([-1.065728, -0.531206, 0.003317, 0.537839, 1.072362, 1.606884]));
600     assert(x.scale!median.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
601     
602     // Can scale using different `centralTendency` and `dispersion` functions
603     assert(x.scale!(mean, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
604     assert(x.scale!(hmean, a => a.standardDeviation(true)).all!approxEqual([-0.848436, -0.262896, 0.322645, 0.908185, 1.493725, 2.079265]));
605     assert(x.scale!(gmean, a => a.standardDeviation(true)).all!approxEqual([-1.167447, -0.581907, 0.003633, 0.589173, 1.174713, 1.760253]));
606     assert(x.scale!(median, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
607 }
608 
609 /// Scale dynamic array
610 version(mir_stat_test)
611 @safe pure nothrow
612 unittest
613 {
614     import mir.algorithm.iteration: all;
615     import mir.math.common: approxEqual;
616 
617     auto x = [1.0, 2, 3, 4, 5, 6];
618     assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
619     assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
620 }
621 
622 /// Scale matrix
623 version(mir_stat_test)
624 @safe pure
625 unittest
626 {
627     import mir.algorithm.iteration: all;
628     import mir.math.common: approxEqual;
629     import mir.ndslice.fuse: fuse;
630     
631     auto x = [
632         [1.0, 2, 3], 
633         [4.0, 5, 6]
634     ].fuse;
635     
636     assert(x.scale.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
637     assert(x.scale(3.5, 1.87083).all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
638 }
639 
640 /// Column scale matrix
641 version(mir_stat_test)
642 @safe pure
643 unittest
644 {
645     import mir.algorithm.iteration: all, equal;
646     import mir.math.common: approxEqual;
647     import mir.ndslice.fuse: fuse;
648     import mir.ndslice.topology: alongDim, byDim, map;
649 
650     auto x = [
651         [20.0, 100.0, 2000.0],
652         [10.0,   5.0,    2.0]
653     ].fuse;
654 
655     auto result = [
656         [ 0.707107,  0.707107,  0.707107],
657         [-0.707107, -0.707107, -0.707107]
658     ].fuse;
659 
660     // Use byDim with map to scale by row/column.
661     auto xScaleByDim = x.byDim!1.map!scale;
662     auto resultByDim = result.byDim!1;
663     assert(xScaleByDim.equal!(equal!approxEqual)(resultByDim));
664 
665     auto xScaleAlongDim = x.alongDim!0.map!scale;
666     auto resultAlongDim = result.alongDim!0;
667     assert(xScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim));
668 }
669 
670 /// Can also pass arguments to `mean` and `standardDeviation` functions used by scale
671 version(mir_stat_test)
672 @safe pure nothrow
673 unittest
674 {
675     import mir.algorithm.iteration: all;
676     import mir.math.common: approxEqual;
677     import mir.ndslice.slice: sliced;
678     import mir.stat.descriptive.univariate: mean, standardDeviation;
679 
680     //Set sum algorithm
681     auto a = [1, 1e100, 1, -1e100];
682 
683     auto x = a.sliced * 10_000;
684 
685     auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;
686 
687     assert(x.scale!(mean!"kbn", standardDeviation!("online", "kbn")).all!approxEqual(result));
688     assert(x.scale!(mean!"kb2", standardDeviation!("online", "kb2")).all!approxEqual(result));
689     assert(x.scale!(mean!"precise", standardDeviation!("online", "precise")).all!approxEqual(result));
690 }
691 
692 // Scale withAsSlice
693 version(mir_stat_test)
694 @safe pure nothrow @nogc
695 unittest
696 {
697     import mir.algorithm.iteration: all;
698     import mir.math.common: approxEqual;
699     import mir.rc.array: RCArray;
700 
701     auto x = RCArray!double(6);
702     foreach(i, ref e; x)
703         e = i + 1;
704 
705     static immutable result = [-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306];
706     assert(x.scale.all!approxEqual(result));
707     assert(x.scale(3.5, 1.87083).all!approxEqual(result));
708 }
709 
710 /++
711 Computes the Z-score of the input.
712 
713 The Z-score is computed by first calculating the mean and standard deviation of
714 the input, by default in one pass, and then scaling the input using those values.
715 
716 Params:
717     F = controls type of output
718     varianceAlgo = algorithm for calculating variance (default: VarianceAlgo.hybrid)
719     summation = algorithm for calculating sums (default: Summation.appropriate)
720 Returns:
721     The z-score of the input
722 See_also:
723     $(LREF scale),
724     $(MATHREF stat, mean),
725     $(MATHREF stat, standardDeviation),
726     $(MATHREF stat, variance)
727 +/
728 template zscore(F, 
729                 VarianceAlgo varianceAlgo = VarianceAlgo.hybrid,
730                 Summation summation = Summation.appropriate)
731 {
732     /++
733     Params:
734         slice = slice
735         isPopulation = true if population standard deviation, false is sample (default)
736     +/
737     @fmamath auto zscore(Iterator, size_t N, SliceKind kind)(
738         Slice!(Iterator, N, kind) slice, 
739         bool isPopulation = false)
740     {
741         import core.lifetime: move;
742         import mir.math.common: sqrt;
743         import mir.math.sum: ResolveSummationType;
744         import mir.stat.descriptive.univariate: meanType, VarianceAccumulator;
745 
746         alias G = meanType!F;
747         alias T = typeof(slice);
748         auto varianceAccumulator = VarianceAccumulator!(
749             G, varianceAlgo, ResolveSummationType!(summation, T, G))(
750             slice.lightScope);
751         return scale(slice,
752                      varianceAccumulator.mean,
753                      varianceAccumulator.variance(isPopulation).sqrt);
754     }
755     
756     /// ditto
757     @fmamath auto zscore(SliceLike)(SliceLike x, bool isPopulation = false)
758         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
759     {
760         import mir.ndslice.slice: toSlice;
761         return zscore(x.toSlice, isPopulation);
762     }
763 }
764 
765 /// ditto
766 template zscore(VarianceAlgo varianceAlgo = VarianceAlgo.hybrid,
767                 Summation summation = Summation.appropriate)
768 {
769     import mir.stat.descriptive.univariate: meanType;
770 
771     /// ditto
772     @fmamath auto zscore(Iterator, size_t N, SliceKind kind)(
773         Slice!(Iterator, N, kind) slice, 
774         bool isPopulation = false)
775     {
776         import core.lifetime: move;
777         alias F = meanType!(Slice!(Iterator, N, kind));
778         return .zscore!(F, varianceAlgo, summation)(slice.move, isPopulation);
779     }
780 
781     /// ditto
782     @fmamath auto zscore(SliceLike)(SliceLike x, bool isPopulation = false)
783         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
784     {
785         import mir.ndslice.slice: toSlice;
786         alias F = meanType!(SliceLike);
787         return .zscore!(F, varianceAlgo, summation)(x.toSlice, isPopulation);
788     }
789 }
790 
791 /// ditto
792 template zscore(F, string varianceAlgo, string summation = "appropriate")
793 {
794     mixin("alias zscore = .zscore!(F, VarianceAlgo." ~ varianceAlgo ~ ", Summation." ~ summation ~ ");");
795 }
796 
797 /// ditto
798 template zscore(string varianceAlgo, string summation = "appropriate")
799 {
800     mixin("alias zscore = .zscore!(VarianceAlgo." ~ varianceAlgo ~ ", Summation." ~ summation ~ ");");
801 }
802 
803 /// zscore vector
804 version(mir_stat_test)
805 @safe pure nothrow
806 unittest
807 {
808     import mir.algorithm.iteration: all;
809     import mir.math.common: approxEqual;
810     import mir.ndslice.slice: sliced;
811 
812     auto x = [1.0, 2, 3, 4, 5, 6].sliced;
813 
814     assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
815     assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
816 }
817 
818 /// zscore dynamic array
819 version(mir_stat_test)
820 @safe pure nothrow
821 unittest
822 {
823     import mir.algorithm.iteration: all;
824     import mir.math.common: approxEqual;
825 
826     auto x = [1.0, 2, 3, 4, 5, 6];
827     assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]));
828     assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]));
829 }
830 
831 /// zscore matrix
832 version(mir_stat_test)
833 @safe pure
834 unittest
835 {
836     import mir.algorithm.iteration: all;
837     import mir.math.common: approxEqual;
838     import mir.ndslice.fuse: fuse;
839     
840     auto x = [
841         [1.0, 2, 3], 
842         [4.0, 5, 6]
843     ].fuse;
844     
845     assert(x.zscore.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]]));
846     assert(x.zscore(true).all!approxEqual([[-1.46385, -0.87831, -0.29277], [0.29277, 0.87831, 1.46385]]));
847 }
848 
849 /// Column zscore matrix
850 version(mir_stat_test)
851 @safe pure
852 unittest
853 {
854     import mir.algorithm.iteration: all, equal;
855     import mir.math.common: approxEqual;
856     import mir.ndslice.fuse: fuse;
857     import mir.ndslice.topology: alongDim, byDim, map;
858 
859     auto x = [
860         [20.0, 100.0, 2000.0],
861         [10.0,   5.0,    2.0]
862     ].fuse;
863 
864     auto result = [
865         [ 0.707107,  0.707107,  0.707107],
866         [-0.707107, -0.707107, -0.707107]
867     ].fuse;
868 
869     // Use byDim with map to scale by row/column.
870     auto xZScoreByDim = x.byDim!1.map!zscore;
871     auto resultByDim = result.byDim!1;
872     assert(xZScoreByDim.equal!(equal!approxEqual)(resultByDim));
873 
874     auto xZScoreAlongDim = x.alongDim!0.map!zscore;
875     auto resultAlongDim = result.alongDim!0;
876     assert(xZScoreAlongDim.equal!(equal!approxEqual)(resultAlongDim));
877 }
878 
879 /// Can control how `mean` and `standardDeviation` are calculated and output type
880 version(mir_stat_test)
881 @safe pure nothrow
882 unittest
883 {
884     import mir.algorithm.iteration: all;
885     import mir.math.common: approxEqual;
886     import mir.ndslice.slice: sliced;
887     import mir.ndslice.topology: repeat;
888 
889     //Set sum algorithm or output type
890     auto a = [1, 1e100, 1, -1e100];
891 
892     auto x = a.sliced * 10_000;
893 
894     auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced;
895 
896     assert(x.zscore!("online", "kbn").all!approxEqual(result));
897     assert(x.zscore!("online", "kb2").all!approxEqual(result));
898     assert(x.zscore!("online", "precise").all!approxEqual(result));
899     assert(x.zscore!(double, "online", "precise").all!approxEqual(result));
900 
901     auto y = [uint.max, uint.max / 2, uint.max / 3].sliced;
902     assert(y.zscore!ulong.all!approxEqual([1.120897, -0.320256, -0.800641]));
903 }
904 
905 // zscore withAsSlice
906 version(mir_stat_test)
907 @safe pure nothrow @nogc
908 unittest
909 {
910     import mir.algorithm.iteration: all;
911     import mir.math.common: approxEqual;
912     import mir.rc.array: RCArray;
913 
914     auto x = RCArray!double(6);
915     foreach(i, ref e; x)
916         e = i + 1;
917 
918     static immutable result1 = [-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306];
919     assert(x.zscore.all!approxEqual(result1));
920     static immutable result2 = [-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385];
921     assert(x.zscore(true).all!approxEqual(result2));
922 }
923 
924 /++
925 Scales input using robust statistics.
926 
927 This function centers the input using the `median` and then `scale`s the data
928 according to the quantile range defined by (`low_quartile`, 1 - `low_quartile`).
929 By default, it uses the interquartile range, whereby `low_quartile` equals 0.25.
930 
931 Params:
932     F = controls type of output
933     quantileAlgo = algorithm for calculating quantile (default: `QuantileAlgo.type7`)
934     allowModifySlice = controls whether the input is modified in place, default is false
935 Returns:
936     The robust scaled input
937 See_also:
938     $(LREF scale),
939     $(SUB2REF univariate, median),
940     $(SUB2REF univariate, quantile),
941     $(SUB2REF univariate, interquartileRange)
942 +/
943 template robustScale(F,
944                      QuantileAlgo quantileAlgo = QuantileAlgo.type7, 
945                      bool allowModifySlice = false)
946 {
947     /++
948     Params:
949         slice = slice
950         low_quartile = lower end of quartile range
951     +/
952     @fmamath auto robustScale(Iterator, size_t N, SliceKind kind, T)(
953         Slice!(Iterator, N, kind) slice, 
954         T low_quartile = 0.25)
955     {
956         assert(low_quartile > 0.0, "robustScale: low_quartile must be greater than zero");
957         assert(low_quartile < 0.5, "robustScale: low_quartile must be less than 0.5");
958 
959         import mir.ndslice.topology: flattened;
960         import mir.stat.descriptive.univariate: median, meanType, quantile, quantileType;
961 
962         static if (!allowModifySlice) {
963             import mir.ndslice.allocation: rcslice;
964             import mir.ndslice.topology: as;
965             import std.traits: Unqual;
966 
967             auto view = slice.lightScope;
968             auto val = view.as!(Unqual!(slice.DeepElement)).rcslice;
969             auto temp = val.lightScope.flattened;
970         } else {
971             auto temp = slice.flattened;
972         }
973 
974         quantileType!(F, quantileAlgo) low_quartile_value = temp.quantile!(F, quantileAlgo, allowModifySlice, false)(low_quartile);
975         meanType!F median_value = temp.median!(F, allowModifySlice);
976         quantileType!(F, quantileAlgo) high_quartile_value = temp.quantile!(F, quantileAlgo, allowModifySlice, false)(cast(F) 1 - low_quartile);
977 
978         static if (allowModifySlice) {
979             return scale(temp, median_value, cast(meanType!F) (high_quartile_value - low_quartile_value));
980         } else {
981             return scale(slice, median_value, cast(meanType!F) (high_quartile_value - low_quartile_value));
982         }
983     }
984 
985     /// ditto
986     @fmamath auto robustScale(SliceLike)(SliceLike x, F low_quartile = cast(F) 0.25)
987         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
988     {
989         import mir.ndslice.slice: toSlice;
990         return robustScale(x.toSlice, low_quartile);
991     }
992 }
993 
994 /++
995 Params:
996     quantileAlgo = algorithm for calculating quantile (default: `QuantileAlgo.type7`)
997     allowModifySlice = controls whether the input is modified in place, default is false
998 +/
999 template robustScale(QuantileAlgo quantileAlgo = QuantileAlgo.type7, 
1000                      bool allowModifySlice = false)
1001 {
1002     import mir.stat.descriptive.univariate: meanType;
1003 
1004     /++
1005     Params:
1006         slice = slice
1007         low_quartile = lower end of quartile range
1008     +/
1009     @fmamath auto robustScale(Iterator, size_t N, SliceKind kind)(
1010         Slice!(Iterator, N, kind) slice, 
1011         double low_quartile = 0.25)
1012     {
1013         import core.lifetime: move;
1014         alias F = meanType!(Slice!(Iterator, N, kind));
1015         return .robustScale!(F, quantileAlgo, allowModifySlice)(slice.move, cast(F) low_quartile);
1016     }
1017 
1018     /// ditto
1019     @fmamath auto robustScale(SliceLike)(SliceLike x, double low_quartile = 0.25)
1020         if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike)
1021     {
1022         import mir.ndslice.slice: toSlice;
1023         alias F = meanType!(SliceLike);
1024         return .robustScale!(F, quantileAlgo, allowModifySlice)(x.toSlice, cast(F) low_quartile);
1025     }
1026 }
1027 
1028 /// ditto
1029 template robustScale(F, string quantileAlgo, bool allowModifySlice = false)
1030 {
1031     mixin("alias robustScale = .robustScale!(F, QuantileAlgo." ~ quantileAlgo ~ ", allowModifySlice);");
1032 }
1033 
1034 /// ditto
1035 template robustScale(string quantileAlgo, bool allowModifySlice = false)
1036 {
1037     mixin("alias robustScale = .robustScale!(QuantileAlgo." ~ quantileAlgo ~ ", allowModifySlice);");
1038 }
1039 
1040 /// ditto
1041 template robustScale(bool allowModifySlice)
1042 {
1043     mixin("alias robustScale = .robustScale!(QuantileAlgo.type7, allowModifySlice);");
1044 }
1045 
1046 /// robustScale vector
1047 version(mir_stat_test)
1048 @safe pure nothrow
1049 unittest
1050 {
1051     import mir.algorithm.iteration: all, findIndex;
1052     import mir.math.common: approxEqual;
1053     import mir.ndslice.slice: sliced;
1054     
1055     static immutable input = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100];
1056     auto x = input.dup.sliced;
1057     auto y = x.robustScale;
1058 
1059     assert(y.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000]));
1060     assert(x.robustScale(0.15).all!approxEqual([8.02752, 0.321101, -0.0458716, 0.0458716, 0.229358, -0.0458716, 0.321101, -0.321101, -0.87156, -10.3211]));
1061 
1062     // When allowModifySlice = true, this modifies both the original input and
1063     // the order of the output
1064     auto yCopy = y.idup;
1065     auto z = x.robustScale!true;
1066     size_t j;
1067     foreach(i, ref e; input) {
1068         j = x.findIndex!(a => a == e);
1069         assert(z[j].approxEqual(yCopy[i]));
1070     }
1071 }
1072 
1073 /// robustScale dynamic array
1074 version(mir_stat_test)
1075 @safe pure nothrow
1076 unittest
1077 {
1078     import mir.algorithm.iteration: all;
1079     import mir.math.common: approxEqual;
1080 
1081     auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100];
1082     assert(x.robustScale.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000]));
1083 }
1084 
1085 /// robustScale matrix
1086 version(mir_stat_test)
1087 @safe pure
1088 unittest
1089 {
1090     import mir.algorithm.iteration: all;
1091     import mir.math.common: approxEqual;
1092     import mir.ndslice.fuse: fuse;
1093 
1094     auto x = [
1095         [100.0, 16, 12, 13,   15], 
1096         [ 12.0, 16,  9,  3, -100]
1097     ].fuse;
1098 
1099     assert(x.robustScale.all!approxEqual([[14.583333, 0.583333, -0.083333, 0.083333, 0.416667], [-0.083333, 0.583333, -0.583333, -1.583333, -18.750000]]));
1100 }
1101 
1102 /// Column robustScale matrix
1103 version(mir_stat_test)
1104 @safe pure
1105 unittest
1106 {
1107     import mir.algorithm.iteration: all, equal;
1108     import mir.math.common: approxEqual;
1109     import mir.ndslice.fuse: fuse;
1110     import mir.ndslice.topology: alongDim, byDim, map;
1111 
1112     auto x = [
1113         [100.0, 16, 12, 13,   15], 
1114         [ 12.0, 16,  9,  3, -100]
1115     ].fuse;
1116 
1117     auto result = [
1118         [28.333333, 0.333333, -1.0, -0.666667,  0.0], 
1119         [ 0.333333, 0.777778,  0.0, -0.666667, -12.111111]
1120     ].fuse;
1121 
1122     // Use byDim with map to scale by row/column.
1123     auto xRobustScaleByDim = x.byDim!0.map!robustScale;
1124     auto resultByDim = result.byDim!0;
1125     assert(xRobustScaleByDim.equal!(equal!approxEqual)(resultByDim));
1126 
1127     auto xRobustScaleAlongDim = x.alongDim!1.map!robustScale;
1128     auto resultAlongDim = result.alongDim!1;
1129     assert(xRobustScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim));
1130 }
1131 
1132 /// Can control `QuantileAlgo` and output type
1133 version(mir_stat_test)
1134 @safe pure nothrow
1135 unittest
1136 {
1137     import mir.algorithm.iteration: all;
1138     import mir.math.common: approxEqual;
1139     import mir.ndslice.slice: sliced;
1140     import mir.ndslice.topology: repeat;
1141 
1142     //Set `QuantileAlgo` algorithm or output type
1143     auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100].sliced;
1144 
1145     assert(x.robustScale!("type9").all!approxEqual([11.864407, 0.474576, -0.0677966, 0.0677966, 0.338983, -0.0677966, 0.474576, -0.474576, -1.288136, -15.254237]));
1146     assert(x.robustScale!("type1").all!approxEqual([12.500000, 0.500000, -0.0714286, 0.0714286, 0.357143, -0.0714286, 0.500000, -0.500000, -1.357143, -16.071429]));
1147     assert(x.robustScale!(float, "type6").all!approxEqual([10.294118f, 0.411765f, -0.0588235f, 0.0588235f, 0.294118f, -0.0588235f, 0.411765f, -0.411765f, -1.117647f, -13.235294f]));
1148 
1149     auto y = [uint.max, uint.max / 2, uint.max / 3].sliced;
1150     assert(y.robustScale!"type1".all!approxEqual([0.75, 0, -0.25]));
1151 
1152     auto z = [ulong.max, ulong.max / 2, ulong.max / 3].sliced;
1153     assert(z.robustScale!(ulong, "type1").all!approxEqual([0.75, 0, -0.25]));
1154 }
1155 
1156 // robustScale withAsSlice
1157 version(mir_stat_test)
1158 @safe pure nothrow @nogc
1159 unittest
1160 {
1161     import mir.algorithm.iteration: all;
1162     import mir.math.common: approxEqual;
1163     import mir.rc.array: RCArray;
1164 
1165     static immutable value = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100];
1166 
1167     auto x = RCArray!double(10);
1168     foreach(i, ref e; x)
1169         e = value[i];
1170 
1171     static immutable result = [14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000];
1172     assert(x.robustScale.all!approxEqual(result));
1173 }
The OpenD Programming Language