1 /++ 2 This module contains algorithms for transforming data that are useful in 3 statistical applications. 4 5 $(SCRIPT inhibitQuickIndex = 1;) 6 $(DIVC quickindex, 7 $(BOOKTABLE , 8 $(TR $(TH Function Name) $(TH Description) 9 ) 10 $(TR $(TD $(LREF center)) 11 $(TD Subtracts the mean (or using some other function) from each element 12 of a slice. 13 )) 14 $(TR $(TD $(LREF robustScale)) 15 $(TD Subtracts the median and divides by the difference between a lower 16 and upper quantile from each element of a slice. 17 )) 18 $(TR $(TD $(LREF scale)) 19 $(TD Subtracts the mean (or using some other function) and divides by 20 the standard deviation (or using some other function) from each element 21 of a slice. 22 )) 23 $(TR $(TD $(LREF sweep)) 24 $(TD Applies a function and an operation to each element of a slice. 25 )) 26 $(TR $(TD $(LREF zscore)) 27 $(TD Subtracts the mean and divides by the standard deviation from each 28 element of a slice. 29 )) 30 )) 31 32 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 33 34 The $(LREF center) function is borrowed from $(HTTP mir-algorithm.$(MIR_SITE)/mir_math_stat.html, mir.math.stat). 35 36 Authors: John Michael Hall, Ilya Yaroshenko 37 38 Copyright: 2022-3 Mir Stat Authors. 39 40 Macros: 41 SUBREF = $(REF_ALTTEXT $(TT $2), $2, mir, stat, $1)$(NBSP) 42 SUB2REF = $(REF_ALTTEXT $(TT $2), $2, mir, stat, descriptive, $1)$(NBSP) 43 MATHREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, math, $1)$(NBSP) 44 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 45 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4)) 46 47 +/ 48 49 module mir.stat.transform; 50 51 import mir.math.common: fmamath; 52 import mir.math.sum: Summation; 53 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 54 import mir.stat.descriptive.univariate: mean, QuantileAlgo, standardDeviation, VarianceAlgo; 55 56 /++ 57 Centers `slice`, which must be a finite iterable. 58 59 By default, `slice` is centered by the mean. A custom function may also be 60 provided using `centralTendency`. 61 62 Returns: 63 The elements in the slice with the average subtracted from them. 64 65 See_also: 66 $(SUB2REF univariate, mean) 67 +/ 68 template center(alias centralTendency = mean!(Summation.appropriate)) 69 { 70 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 71 /++ 72 Params: 73 slice = slice 74 +/ 75 auto center(Iterator, size_t N, SliceKind kind)( 76 Slice!(Iterator, N, kind) slice) 77 { 78 import core.lifetime: move; 79 import mir.ndslice.internal: LeftOp, ImplicitlyUnqual; 80 import mir.ndslice.topology: vmap; 81 82 auto m = centralTendency(slice.lightScope); 83 alias T = typeof(m); 84 return slice.move.vmap(LeftOp!("-", ImplicitlyUnqual!T)(m)); 85 } 86 87 /// ditto 88 auto center(T)(T x) 89 if (isConvertibleToSlice!T && !isSlice!T) 90 { 91 import mir.ndslice.slice: toSlice; 92 return center(x.toSlice); 93 } 94 } 95 96 /// Center vector 97 version(mir_stat_test) 98 @safe pure nothrow 99 unittest 100 { 101 import mir.algorithm.iteration: all; 102 import mir.math.common: approxEqual; 103 import mir.ndslice.slice: sliced; 104 import mir.stat.descriptive.univariate: gmean, hmean, median; 105 106 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 107 assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 108 109 // Can center using different functions 110 assert(x.center!hmean.all!approxEqual([-1.44898, -0.44898, 0.55102, 1.55102, 2.55102, 3.55102])); 111 assert(x.center!gmean.all!approxEqual([-1.99379516, -0.99379516, 0.00620483, 1.00620483, 2.00620483, 3.00620483])); 112 assert(x.center!median.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 113 114 // center operates lazily, if original slice is changed, then 115 auto y = x.center; 116 assert(y.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 117 x[0]++; 118 assert(y.all!approxEqual([-1.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 119 } 120 121 /// Example of lazy behavior of center 122 version(mir_stat_test) 123 @safe pure nothrow 124 unittest 125 { 126 import mir.algorithm.iteration: all; 127 import mir.math.common: approxEqual; 128 import mir.ndslice.allocation: slice; 129 import mir.ndslice.slice: sliced; 130 131 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 132 auto y = x.center; 133 auto z = x.center.slice; 134 assert(y.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 135 x[0]++; 136 // y changes, while z does not 137 assert(y.all!approxEqual([-1.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 138 assert(z.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 139 } 140 141 /// Center dynamic array 142 version(mir_stat_test) 143 @safe pure nothrow 144 unittest 145 { 146 import mir.algorithm.iteration: all; 147 import mir.math.common: approxEqual; 148 149 auto x = [1.0, 2, 3, 4, 5, 6]; 150 assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 151 } 152 153 /// Center matrix 154 version(mir_stat_test) 155 @safe pure 156 unittest 157 { 158 import mir.algorithm.iteration: all; 159 import mir.math.common: approxEqual; 160 import mir.ndslice: fuse; 161 162 auto x = [ 163 [0.0, 1, 2], 164 [3.0, 4, 5] 165 ].fuse; 166 167 auto y = [ 168 [-2.5, -1.5, -0.5], 169 [ 0.5, 1.5, 2.5] 170 ].fuse; 171 172 assert(x.center.all!approxEqual(y)); 173 } 174 175 /// Column center matrix 176 version(mir_stat_test) 177 @safe pure 178 unittest 179 { 180 import mir.algorithm.iteration: all, equal; 181 import mir.math.common: approxEqual; 182 import mir.ndslice: fuse; 183 import mir.ndslice.topology: alongDim, byDim, map; 184 185 auto x = [ 186 [20.0, 100.0, 2000.0], 187 [10.0, 5.0, 2.0] 188 ].fuse; 189 190 auto result = [ 191 [ 5.0, 47.5, 999], 192 [-5.0, -47.5, -999] 193 ].fuse; 194 195 // Use byDim with map to compute average of row/column. 196 auto xCenterByDim = x.byDim!1.map!center; 197 auto resultByDim = result.byDim!1; 198 assert(xCenterByDim.equal!(equal!approxEqual)(resultByDim)); 199 200 auto xCenterAlongDim = x.alongDim!0.map!center; 201 auto resultAlongDim = result.alongDim!0; 202 assert(xCenterByDim.equal!(equal!approxEqual)(resultAlongDim)); 203 } 204 205 /// Can also pass arguments to average function used by center 206 version(mir_stat_test) 207 pure @safe nothrow 208 unittest 209 { 210 import mir.ndslice.slice: sliced; 211 import mir.stat.descriptive.univariate: mean; 212 213 //Set sum algorithm or output type 214 auto a = [1, 1e100, 1, -1e100]; 215 216 auto x = a.sliced * 10_000; 217 218 //Due to Floating Point precision, subtracting the mean from the second 219 //and fourth numbers in `x` does not change the value of the result 220 auto result = [5000, 1e104, 5000, -1e104].sliced; 221 222 assert(x.center!(mean!"kbn") == result); 223 assert(x.center!(mean!"kb2") == result); 224 assert(x.center!(mean!"precise") == result); 225 } 226 227 /++ 228 Passing a centered input to `variance` or `standardDeviation` with the 229 `assumeZeroMean` algorithm is equivalent to calculating `variance` or 230 `standardDeviation` on the original input. 231 +/ 232 version(mir_stat_test) 233 @safe pure nothrow 234 unittest 235 { 236 import mir.algorithm.iteration: all; 237 import mir.math.common: approxEqual; 238 import mir.ndslice.slice: sliced; 239 import mir.stat.descriptive.univariate: standardDeviation, variance; 240 241 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 242 assert(x.center.variance!"assumeZeroMean".approxEqual(x.variance)); 243 assert(x.center.standardDeviation!"assumeZeroMean".approxEqual(x.standardDeviation)); 244 } 245 246 // dynamic array test 247 version(mir_stat_test) 248 @safe pure nothrow 249 unittest 250 { 251 import mir.algorithm.iteration: all; 252 import mir.math.common: approxEqual; 253 254 double[] x = [1.0, 2, 3, 4, 5, 6]; 255 256 assert(x.center.all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 257 } 258 259 // withAsSlice test 260 version(mir_stat_test) 261 @safe pure nothrow @nogc 262 unittest 263 { 264 import mir.algorithm.iteration: all; 265 import mir.math.common: approxEqual; 266 import mir.rc.array: RCArray; 267 268 static immutable a = [1.0, 2, 3, 4, 5, 6]; 269 static immutable result = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]; 270 271 auto x = RCArray!double(6); 272 foreach(i, ref e; x) 273 e = a[i]; 274 275 assert(x.center.all!approxEqual(result)); 276 } 277 278 /++ 279 For each `e` of the input, applies `e op m` where `m` is the result of `fun` and 280 `op` is an operation, such as `"+"`, `"-"`, `"*"`, or `"/"`. For instance, if 281 `op = "-"`, then this function computes `e - m` for each `e` of the input and 282 where `m` is the result of applying `fun` to the input. 283 Overloads are provided to directly provide `m` to the function, rather than 284 calculate it using `fun`. 285 286 Params: 287 fun = function used to sweep 288 op = operation 289 Returns: 290 The input 291 See_also: 292 $(LREF center), 293 $(LREF scale) 294 +/ 295 template sweep(alias fun, string op) 296 { 297 /++ 298 Params: 299 slice = slice 300 +/ 301 @fmamath auto sweep(Iterator, size_t N, SliceKind kind)( 302 Slice!(Iterator, N, kind) slice) 303 { 304 import core.lifetime: move; 305 306 auto m = fun(slice.lightScope); 307 return .sweep!op(slice.move, m); 308 } 309 310 /// ditto 311 @fmamath auto sweep(SliceLike)(SliceLike x) 312 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 313 { 314 import mir.ndslice.slice: toSlice; 315 return sweep(x.toSlice); 316 } 317 } 318 319 /++ 320 Params: 321 op = operation 322 +/ 323 template sweep(string op) 324 { 325 /++ 326 Params: 327 slice = slice 328 m = value to pass to vmap 329 +/ 330 @fmamath auto sweep(Iterator, size_t N, SliceKind kind, T)( 331 Slice!(Iterator, N, kind) slice, T m) 332 { 333 import core.lifetime: move; 334 import mir.ndslice.internal: LeftOp, ImplicitlyUnqual; 335 import mir.ndslice.topology: vmap; 336 337 return slice.move.vmap(LeftOp!(op, ImplicitlyUnqual!T)(m)); 338 } 339 340 /// ditto 341 @fmamath auto sweep(SliceLike, T)(SliceLike x, T m) 342 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 343 { 344 import mir.ndslice.slice: toSlice; 345 return sweep(x.toSlice, m); 346 } 347 } 348 349 /// Sweep vector 350 version(mir_stat_test) 351 @safe pure nothrow 352 unittest 353 { 354 import mir.algorithm.iteration: all; 355 import mir.math.common: approxEqual; 356 import mir.ndslice.slice: sliced; 357 358 static double f(T)(T x) { 359 return 3.5; 360 } 361 362 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 363 assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 364 assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 365 assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5])); 366 } 367 368 /// Sweep dynamic array 369 version(mir_stat_test) 370 @safe pure nothrow 371 unittest 372 { 373 import mir.algorithm.iteration: all; 374 import mir.math.common: approxEqual; 375 376 static double f(T)(T x) { 377 return 3.5; 378 } 379 380 auto x = [1.0, 2, 3, 4, 5, 6]; 381 assert(x.sweep!(f, "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 382 assert(x.sweep!"-"(3.5).all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 383 assert(x.sweep!(f, "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5])); 384 } 385 386 /// Sweep matrix 387 version(mir_stat_test) 388 @safe pure 389 unittest 390 { 391 import mir.algorithm.iteration: all; 392 import mir.math.common: approxEqual; 393 import mir.ndslice.fuse: fuse; 394 395 static double f(T)(T x) { 396 return 3.5; 397 } 398 399 auto x = [ 400 [1.0, 2, 3], 401 [4.0, 5, 6] 402 ].fuse; 403 404 auto y0 = [ 405 [-2.5, -1.5, -0.5], 406 [ 0.5, 1.5, 2.5] 407 ]; 408 409 auto y1 = [ 410 [4.5, 5.5, 6.5], 411 [7.5, 8.5, 9.5] 412 ]; 413 414 assert(x.sweep!(f, "-").all!approxEqual(y0)); 415 assert(x.sweep!"-"(3.5).all!approxEqual(y0)); 416 assert(x.sweep!(f, "+").all!approxEqual(y1)); 417 } 418 419 /// Column sweep matrix 420 version(mir_stat_test) 421 @safe pure 422 unittest 423 { 424 import mir.algorithm.iteration: all, equal; 425 import mir.math.common: approxEqual; 426 import mir.ndslice.fuse: fuse; 427 import mir.ndslice.topology: alongDim, byDim, map; 428 429 static double f(T)(T x) { 430 return 0.5 * (x[0] +x[1]); 431 } 432 433 auto x = [ 434 [20.0, 100.0, 2000.0], 435 [10.0, 5.0, 2.0] 436 ].fuse; 437 438 auto result = [ 439 [ 5.0, 47.5, 999], 440 [-5.0, -47.5, -999] 441 ].fuse; 442 443 // Use byDim with map to sweep mean of row/column. 444 auto xSweepByDim = x.byDim!1.map!(sweep!(f, "-")); 445 auto resultByDim = result.byDim!1; 446 assert(xSweepByDim.equal!(equal!approxEqual)(resultByDim)); 447 448 auto xSweepAlongDim = x.alongDim!0.map!(sweep!(f, "-")); 449 auto resultAlongDim = result.alongDim!0; 450 assert(xSweepAlongDim.equal!(equal!approxEqual)(resultAlongDim)); 451 } 452 453 /// Can also pass arguments to sweep function 454 version(mir_stat_test) 455 @safe pure nothrow 456 unittest 457 { 458 import mir.algorithm.iteration: all; 459 import mir.math.common: approxEqual; 460 import mir.ndslice.slice: sliced; 461 462 static double f(T)(T x, double a) { 463 return a; 464 } 465 466 static double g(double a, T)(T x) { 467 return a; 468 } 469 470 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 471 assert(x.sweep!(a => f(a, 3.5), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 472 assert(x.sweep!(a => f(a, 3.5), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5])); 473 assert(x.sweep!(a => g!3.5(a), "-").all!approxEqual([-2.5, -1.5, -0.5, 0.5, 1.5, 2.5])); 474 assert(x.sweep!(a => g!3.5(a), "+").all!approxEqual([4.5, 5.5, 6.5, 7.5, 8.5, 9.5])); 475 } 476 477 /// Sweep withAsSlice 478 version(mir_stat_test) 479 @safe pure nothrow @nogc 480 unittest 481 { 482 import mir.algorithm.iteration: all; 483 import mir.math.common: approxEqual; 484 import mir.rc.array: RCArray; 485 486 static double f(T)(T x) { 487 return 3.5; 488 } 489 490 auto x = RCArray!double(6); 491 foreach(i, ref e; x) 492 e = i + 1; 493 494 static immutable result1 = [-2.5, -1.5, -0.5, 0.5, 1.5, 2.5]; 495 assert(x.sweep!(f, "-").all!approxEqual(result1)); 496 assert(x.sweep!"-"(3.5).all!approxEqual(result1)); 497 static immutable result2 = [4.5, 5.5, 6.5, 7.5, 8.5, 9.5]; 498 assert(x.sweep!(f, "+").all!approxEqual(result2)); 499 } 500 501 /++ 502 Scales the input. 503 504 By default, the input is first centered using the mean of the input. A custom 505 function may also be provided using `centralTendency`. The centered input is 506 then divided by the sample standard deviation of the input. A custom function 507 may also be provided using `dispersion`. 508 509 Overloads are also provided to scale with variables `m` and `d`, which 510 correspond to the results of `centralTendency` and `dispersion`. This function 511 is equivalent to `center` when passing `d = 1`. 512 513 Params: 514 centralTendency = function used to center input, default is `mean` 515 dispersion = function used as divisor, default is `dispersion` 516 Returns: 517 The scaled result 518 See_also: 519 $(LREF center), 520 $(SUB2REF univariate, VarianceAlgo), 521 $(MATHREF sum, Summation), 522 $(SUB2REF univariate, mean), 523 $(SUB2REF univariate, standardDeviation), 524 $(SUB2REF univariate, median), 525 $(SUB2REF univariate, gmean), 526 $(SUB2REF univariate, hmean), 527 $(SUB2REF univariate, variance), 528 $(SUB2REF univariate, dispersion) 529 +/ 530 template scale(alias centralTendency = mean!(Summation.appropriate), 531 alias dispersion = standardDeviation!(VarianceAlgo.hybrid, Summation.appropriate)) 532 { 533 /++ 534 Params: 535 slice = slice 536 +/ 537 @fmamath auto scale(Iterator, size_t N, SliceKind kind)( 538 Slice!(Iterator, N, kind) slice) 539 { 540 import core.lifetime: move; 541 542 auto m = centralTendency(slice.lightScope); 543 auto d = dispersion(slice.lightScope); 544 return .scale!(Iterator, N, kind, typeof(m))(slice.move, m, d); 545 } 546 547 /// ditto 548 @fmamath auto scale(SliceLike)(SliceLike x) 549 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 550 { 551 import mir.ndslice.slice: toSlice; 552 return scale(x.toSlice); 553 } 554 } 555 556 /++ 557 Params: 558 slice = slice 559 m = value to subtract from slice 560 d = value to divide slice by 561 +/ 562 @fmamath auto scale(Iterator, size_t N, SliceKind kind, T, U)( 563 Slice!(Iterator, N, kind) slice, T m, U d) 564 { 565 import core.lifetime: move; 566 import mir.ndslice.internal: LeftOp, ImplicitlyUnqual; 567 import mir.ndslice.topology: vmap; 568 569 assert(d > 0, "scale: cannot divide by zero"); 570 571 return slice.move.sweep!"-"(m).sweep!"/"(d); 572 } 573 574 /// ditto 575 @fmamath auto scale(SliceLike, T, U)(SliceLike x, T m, U d) 576 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 577 { 578 import mir.ndslice.slice: toSlice; 579 return scale(x.toSlice, m, d); 580 } 581 582 /// Scale vector 583 version(mir_stat_test) 584 @safe pure nothrow 585 unittest 586 { 587 import mir.algorithm.iteration: all; 588 import mir.math.common: approxEqual; 589 import mir.ndslice.slice: sliced; 590 import mir.stat.descriptive.univariate: mean, gmean, hmean, median, standardDeviation; 591 592 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 593 594 assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 595 assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 596 597 // Can scale using different `centralTendency` functions 598 assert(x.scale!hmean.all!approxEqual([-0.774512, -0.23999, 0.294533, 0.829055, 1.363578, 1.898100])); 599 assert(x.scale!gmean.all!approxEqual([-1.065728, -0.531206, 0.003317, 0.537839, 1.072362, 1.606884])); 600 assert(x.scale!median.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 601 602 // Can scale using different `centralTendency` and `dispersion` functions 603 assert(x.scale!(mean, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385])); 604 assert(x.scale!(hmean, a => a.standardDeviation(true)).all!approxEqual([-0.848436, -0.262896, 0.322645, 0.908185, 1.493725, 2.079265])); 605 assert(x.scale!(gmean, a => a.standardDeviation(true)).all!approxEqual([-1.167447, -0.581907, 0.003633, 0.589173, 1.174713, 1.760253])); 606 assert(x.scale!(median, a => a.standardDeviation(true)).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385])); 607 } 608 609 /// Scale dynamic array 610 version(mir_stat_test) 611 @safe pure nothrow 612 unittest 613 { 614 import mir.algorithm.iteration: all; 615 import mir.math.common: approxEqual; 616 617 auto x = [1.0, 2, 3, 4, 5, 6]; 618 assert(x.scale.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 619 assert(x.scale(3.5, 1.87083).all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 620 } 621 622 /// Scale matrix 623 version(mir_stat_test) 624 @safe pure 625 unittest 626 { 627 import mir.algorithm.iteration: all; 628 import mir.math.common: approxEqual; 629 import mir.ndslice.fuse: fuse; 630 631 auto x = [ 632 [1.0, 2, 3], 633 [4.0, 5, 6] 634 ].fuse; 635 636 assert(x.scale.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]])); 637 assert(x.scale(3.5, 1.87083).all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]])); 638 } 639 640 /// Column scale matrix 641 version(mir_stat_test) 642 @safe pure 643 unittest 644 { 645 import mir.algorithm.iteration: all, equal; 646 import mir.math.common: approxEqual; 647 import mir.ndslice.fuse: fuse; 648 import mir.ndslice.topology: alongDim, byDim, map; 649 650 auto x = [ 651 [20.0, 100.0, 2000.0], 652 [10.0, 5.0, 2.0] 653 ].fuse; 654 655 auto result = [ 656 [ 0.707107, 0.707107, 0.707107], 657 [-0.707107, -0.707107, -0.707107] 658 ].fuse; 659 660 // Use byDim with map to scale by row/column. 661 auto xScaleByDim = x.byDim!1.map!scale; 662 auto resultByDim = result.byDim!1; 663 assert(xScaleByDim.equal!(equal!approxEqual)(resultByDim)); 664 665 auto xScaleAlongDim = x.alongDim!0.map!scale; 666 auto resultAlongDim = result.alongDim!0; 667 assert(xScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim)); 668 } 669 670 /// Can also pass arguments to `mean` and `standardDeviation` functions used by scale 671 version(mir_stat_test) 672 @safe pure nothrow 673 unittest 674 { 675 import mir.algorithm.iteration: all; 676 import mir.math.common: approxEqual; 677 import mir.ndslice.slice: sliced; 678 import mir.stat.descriptive.univariate: mean, standardDeviation; 679 680 //Set sum algorithm 681 auto a = [1, 1e100, 1, -1e100]; 682 683 auto x = a.sliced * 10_000; 684 685 auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced; 686 687 assert(x.scale!(mean!"kbn", standardDeviation!("online", "kbn")).all!approxEqual(result)); 688 assert(x.scale!(mean!"kb2", standardDeviation!("online", "kb2")).all!approxEqual(result)); 689 assert(x.scale!(mean!"precise", standardDeviation!("online", "precise")).all!approxEqual(result)); 690 } 691 692 // Scale withAsSlice 693 version(mir_stat_test) 694 @safe pure nothrow @nogc 695 unittest 696 { 697 import mir.algorithm.iteration: all; 698 import mir.math.common: approxEqual; 699 import mir.rc.array: RCArray; 700 701 auto x = RCArray!double(6); 702 foreach(i, ref e; x) 703 e = i + 1; 704 705 static immutable result = [-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]; 706 assert(x.scale.all!approxEqual(result)); 707 assert(x.scale(3.5, 1.87083).all!approxEqual(result)); 708 } 709 710 /++ 711 Computes the Z-score of the input. 712 713 The Z-score is computed by first calculating the mean and standard deviation of 714 the input, by default in one pass, and then scaling the input using those values. 715 716 Params: 717 F = controls type of output 718 varianceAlgo = algorithm for calculating variance (default: VarianceAlgo.hybrid) 719 summation = algorithm for calculating sums (default: Summation.appropriate) 720 Returns: 721 The z-score of the input 722 See_also: 723 $(LREF scale), 724 $(MATHREF stat, mean), 725 $(MATHREF stat, standardDeviation), 726 $(MATHREF stat, variance) 727 +/ 728 template zscore(F, 729 VarianceAlgo varianceAlgo = VarianceAlgo.hybrid, 730 Summation summation = Summation.appropriate) 731 { 732 /++ 733 Params: 734 slice = slice 735 isPopulation = true if population standard deviation, false is sample (default) 736 +/ 737 @fmamath auto zscore(Iterator, size_t N, SliceKind kind)( 738 Slice!(Iterator, N, kind) slice, 739 bool isPopulation = false) 740 { 741 import core.lifetime: move; 742 import mir.math.common: sqrt; 743 import mir.math.sum: ResolveSummationType; 744 import mir.stat.descriptive.univariate: meanType, VarianceAccumulator; 745 746 alias G = meanType!F; 747 alias T = typeof(slice); 748 auto varianceAccumulator = VarianceAccumulator!( 749 G, varianceAlgo, ResolveSummationType!(summation, T, G))( 750 slice.lightScope); 751 return scale(slice, 752 varianceAccumulator.mean, 753 varianceAccumulator.variance(isPopulation).sqrt); 754 } 755 756 /// ditto 757 @fmamath auto zscore(SliceLike)(SliceLike x, bool isPopulation = false) 758 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 759 { 760 import mir.ndslice.slice: toSlice; 761 return zscore(x.toSlice, isPopulation); 762 } 763 } 764 765 /// ditto 766 template zscore(VarianceAlgo varianceAlgo = VarianceAlgo.hybrid, 767 Summation summation = Summation.appropriate) 768 { 769 import mir.stat.descriptive.univariate: meanType; 770 771 /// ditto 772 @fmamath auto zscore(Iterator, size_t N, SliceKind kind)( 773 Slice!(Iterator, N, kind) slice, 774 bool isPopulation = false) 775 { 776 import core.lifetime: move; 777 alias F = meanType!(Slice!(Iterator, N, kind)); 778 return .zscore!(F, varianceAlgo, summation)(slice.move, isPopulation); 779 } 780 781 /// ditto 782 @fmamath auto zscore(SliceLike)(SliceLike x, bool isPopulation = false) 783 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 784 { 785 import mir.ndslice.slice: toSlice; 786 alias F = meanType!(SliceLike); 787 return .zscore!(F, varianceAlgo, summation)(x.toSlice, isPopulation); 788 } 789 } 790 791 /// ditto 792 template zscore(F, string varianceAlgo, string summation = "appropriate") 793 { 794 mixin("alias zscore = .zscore!(F, VarianceAlgo." ~ varianceAlgo ~ ", Summation." ~ summation ~ ");"); 795 } 796 797 /// ditto 798 template zscore(string varianceAlgo, string summation = "appropriate") 799 { 800 mixin("alias zscore = .zscore!(VarianceAlgo." ~ varianceAlgo ~ ", Summation." ~ summation ~ ");"); 801 } 802 803 /// zscore vector 804 version(mir_stat_test) 805 @safe pure nothrow 806 unittest 807 { 808 import mir.algorithm.iteration: all; 809 import mir.math.common: approxEqual; 810 import mir.ndslice.slice: sliced; 811 812 auto x = [1.0, 2, 3, 4, 5, 6].sliced; 813 814 assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 815 assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385])); 816 } 817 818 /// zscore dynamic array 819 version(mir_stat_test) 820 @safe pure nothrow 821 unittest 822 { 823 import mir.algorithm.iteration: all; 824 import mir.math.common: approxEqual; 825 826 auto x = [1.0, 2, 3, 4, 5, 6]; 827 assert(x.zscore.all!approxEqual([-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306])); 828 assert(x.zscore(true).all!approxEqual([-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385])); 829 } 830 831 /// zscore matrix 832 version(mir_stat_test) 833 @safe pure 834 unittest 835 { 836 import mir.algorithm.iteration: all; 837 import mir.math.common: approxEqual; 838 import mir.ndslice.fuse: fuse; 839 840 auto x = [ 841 [1.0, 2, 3], 842 [4.0, 5, 6] 843 ].fuse; 844 845 assert(x.zscore.all!approxEqual([[-1.336306, -0.801784, -0.267261], [0.267261, 0.801784, 1.336306]])); 846 assert(x.zscore(true).all!approxEqual([[-1.46385, -0.87831, -0.29277], [0.29277, 0.87831, 1.46385]])); 847 } 848 849 /// Column zscore matrix 850 version(mir_stat_test) 851 @safe pure 852 unittest 853 { 854 import mir.algorithm.iteration: all, equal; 855 import mir.math.common: approxEqual; 856 import mir.ndslice.fuse: fuse; 857 import mir.ndslice.topology: alongDim, byDim, map; 858 859 auto x = [ 860 [20.0, 100.0, 2000.0], 861 [10.0, 5.0, 2.0] 862 ].fuse; 863 864 auto result = [ 865 [ 0.707107, 0.707107, 0.707107], 866 [-0.707107, -0.707107, -0.707107] 867 ].fuse; 868 869 // Use byDim with map to scale by row/column. 870 auto xZScoreByDim = x.byDim!1.map!zscore; 871 auto resultByDim = result.byDim!1; 872 assert(xZScoreByDim.equal!(equal!approxEqual)(resultByDim)); 873 874 auto xZScoreAlongDim = x.alongDim!0.map!zscore; 875 auto resultAlongDim = result.alongDim!0; 876 assert(xZScoreAlongDim.equal!(equal!approxEqual)(resultAlongDim)); 877 } 878 879 /// Can control how `mean` and `standardDeviation` are calculated and output type 880 version(mir_stat_test) 881 @safe pure nothrow 882 unittest 883 { 884 import mir.algorithm.iteration: all; 885 import mir.math.common: approxEqual; 886 import mir.ndslice.slice: sliced; 887 import mir.ndslice.topology: repeat; 888 889 //Set sum algorithm or output type 890 auto a = [1, 1e100, 1, -1e100]; 891 892 auto x = a.sliced * 10_000; 893 894 auto result = [6.123724e-101, 1.224745, 6.123724e-101, -1.224745].sliced; 895 896 assert(x.zscore!("online", "kbn").all!approxEqual(result)); 897 assert(x.zscore!("online", "kb2").all!approxEqual(result)); 898 assert(x.zscore!("online", "precise").all!approxEqual(result)); 899 assert(x.zscore!(double, "online", "precise").all!approxEqual(result)); 900 901 auto y = [uint.max, uint.max / 2, uint.max / 3].sliced; 902 assert(y.zscore!ulong.all!approxEqual([1.120897, -0.320256, -0.800641])); 903 } 904 905 // zscore withAsSlice 906 version(mir_stat_test) 907 @safe pure nothrow @nogc 908 unittest 909 { 910 import mir.algorithm.iteration: all; 911 import mir.math.common: approxEqual; 912 import mir.rc.array: RCArray; 913 914 auto x = RCArray!double(6); 915 foreach(i, ref e; x) 916 e = i + 1; 917 918 static immutable result1 = [-1.336306, -0.801784, -0.267261, 0.267261, 0.801784, 1.336306]; 919 assert(x.zscore.all!approxEqual(result1)); 920 static immutable result2 = [-1.46385, -0.87831, -0.29277, 0.29277, 0.87831, 1.46385]; 921 assert(x.zscore(true).all!approxEqual(result2)); 922 } 923 924 /++ 925 Scales input using robust statistics. 926 927 This function centers the input using the `median` and then `scale`s the data 928 according to the quantile range defined by (`low_quartile`, 1 - `low_quartile`). 929 By default, it uses the interquartile range, whereby `low_quartile` equals 0.25. 930 931 Params: 932 F = controls type of output 933 quantileAlgo = algorithm for calculating quantile (default: `QuantileAlgo.type7`) 934 allowModifySlice = controls whether the input is modified in place, default is false 935 Returns: 936 The robust scaled input 937 See_also: 938 $(LREF scale), 939 $(SUB2REF univariate, median), 940 $(SUB2REF univariate, quantile), 941 $(SUB2REF univariate, interquartileRange) 942 +/ 943 template robustScale(F, 944 QuantileAlgo quantileAlgo = QuantileAlgo.type7, 945 bool allowModifySlice = false) 946 { 947 /++ 948 Params: 949 slice = slice 950 low_quartile = lower end of quartile range 951 +/ 952 @fmamath auto robustScale(Iterator, size_t N, SliceKind kind, T)( 953 Slice!(Iterator, N, kind) slice, 954 T low_quartile = 0.25) 955 { 956 assert(low_quartile > 0.0, "robustScale: low_quartile must be greater than zero"); 957 assert(low_quartile < 0.5, "robustScale: low_quartile must be less than 0.5"); 958 959 import mir.ndslice.topology: flattened; 960 import mir.stat.descriptive.univariate: median, meanType, quantile, quantileType; 961 962 static if (!allowModifySlice) { 963 import mir.ndslice.allocation: rcslice; 964 import mir.ndslice.topology: as; 965 import std.traits: Unqual; 966 967 auto view = slice.lightScope; 968 auto val = view.as!(Unqual!(slice.DeepElement)).rcslice; 969 auto temp = val.lightScope.flattened; 970 } else { 971 auto temp = slice.flattened; 972 } 973 974 quantileType!(F, quantileAlgo) low_quartile_value = temp.quantile!(F, quantileAlgo, allowModifySlice, false)(low_quartile); 975 meanType!F median_value = temp.median!(F, allowModifySlice); 976 quantileType!(F, quantileAlgo) high_quartile_value = temp.quantile!(F, quantileAlgo, allowModifySlice, false)(cast(F) 1 - low_quartile); 977 978 static if (allowModifySlice) { 979 return scale(temp, median_value, cast(meanType!F) (high_quartile_value - low_quartile_value)); 980 } else { 981 return scale(slice, median_value, cast(meanType!F) (high_quartile_value - low_quartile_value)); 982 } 983 } 984 985 /// ditto 986 @fmamath auto robustScale(SliceLike)(SliceLike x, F low_quartile = cast(F) 0.25) 987 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 988 { 989 import mir.ndslice.slice: toSlice; 990 return robustScale(x.toSlice, low_quartile); 991 } 992 } 993 994 /++ 995 Params: 996 quantileAlgo = algorithm for calculating quantile (default: `QuantileAlgo.type7`) 997 allowModifySlice = controls whether the input is modified in place, default is false 998 +/ 999 template robustScale(QuantileAlgo quantileAlgo = QuantileAlgo.type7, 1000 bool allowModifySlice = false) 1001 { 1002 import mir.stat.descriptive.univariate: meanType; 1003 1004 /++ 1005 Params: 1006 slice = slice 1007 low_quartile = lower end of quartile range 1008 +/ 1009 @fmamath auto robustScale(Iterator, size_t N, SliceKind kind)( 1010 Slice!(Iterator, N, kind) slice, 1011 double low_quartile = 0.25) 1012 { 1013 import core.lifetime: move; 1014 alias F = meanType!(Slice!(Iterator, N, kind)); 1015 return .robustScale!(F, quantileAlgo, allowModifySlice)(slice.move, cast(F) low_quartile); 1016 } 1017 1018 /// ditto 1019 @fmamath auto robustScale(SliceLike)(SliceLike x, double low_quartile = 0.25) 1020 if (isConvertibleToSlice!SliceLike && !isSlice!SliceLike) 1021 { 1022 import mir.ndslice.slice: toSlice; 1023 alias F = meanType!(SliceLike); 1024 return .robustScale!(F, quantileAlgo, allowModifySlice)(x.toSlice, cast(F) low_quartile); 1025 } 1026 } 1027 1028 /// ditto 1029 template robustScale(F, string quantileAlgo, bool allowModifySlice = false) 1030 { 1031 mixin("alias robustScale = .robustScale!(F, QuantileAlgo." ~ quantileAlgo ~ ", allowModifySlice);"); 1032 } 1033 1034 /// ditto 1035 template robustScale(string quantileAlgo, bool allowModifySlice = false) 1036 { 1037 mixin("alias robustScale = .robustScale!(QuantileAlgo." ~ quantileAlgo ~ ", allowModifySlice);"); 1038 } 1039 1040 /// ditto 1041 template robustScale(bool allowModifySlice) 1042 { 1043 mixin("alias robustScale = .robustScale!(QuantileAlgo.type7, allowModifySlice);"); 1044 } 1045 1046 /// robustScale vector 1047 version(mir_stat_test) 1048 @safe pure nothrow 1049 unittest 1050 { 1051 import mir.algorithm.iteration: all, findIndex; 1052 import mir.math.common: approxEqual; 1053 import mir.ndslice.slice: sliced; 1054 1055 static immutable input = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100]; 1056 auto x = input.dup.sliced; 1057 auto y = x.robustScale; 1058 1059 assert(y.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000])); 1060 assert(x.robustScale(0.15).all!approxEqual([8.02752, 0.321101, -0.0458716, 0.0458716, 0.229358, -0.0458716, 0.321101, -0.321101, -0.87156, -10.3211])); 1061 1062 // When allowModifySlice = true, this modifies both the original input and 1063 // the order of the output 1064 auto yCopy = y.idup; 1065 auto z = x.robustScale!true; 1066 size_t j; 1067 foreach(i, ref e; input) { 1068 j = x.findIndex!(a => a == e); 1069 assert(z[j].approxEqual(yCopy[i])); 1070 } 1071 } 1072 1073 /// robustScale dynamic array 1074 version(mir_stat_test) 1075 @safe pure nothrow 1076 unittest 1077 { 1078 import mir.algorithm.iteration: all; 1079 import mir.math.common: approxEqual; 1080 1081 auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100]; 1082 assert(x.robustScale.all!approxEqual([14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000])); 1083 } 1084 1085 /// robustScale matrix 1086 version(mir_stat_test) 1087 @safe pure 1088 unittest 1089 { 1090 import mir.algorithm.iteration: all; 1091 import mir.math.common: approxEqual; 1092 import mir.ndslice.fuse: fuse; 1093 1094 auto x = [ 1095 [100.0, 16, 12, 13, 15], 1096 [ 12.0, 16, 9, 3, -100] 1097 ].fuse; 1098 1099 assert(x.robustScale.all!approxEqual([[14.583333, 0.583333, -0.083333, 0.083333, 0.416667], [-0.083333, 0.583333, -0.583333, -1.583333, -18.750000]])); 1100 } 1101 1102 /// Column robustScale matrix 1103 version(mir_stat_test) 1104 @safe pure 1105 unittest 1106 { 1107 import mir.algorithm.iteration: all, equal; 1108 import mir.math.common: approxEqual; 1109 import mir.ndslice.fuse: fuse; 1110 import mir.ndslice.topology: alongDim, byDim, map; 1111 1112 auto x = [ 1113 [100.0, 16, 12, 13, 15], 1114 [ 12.0, 16, 9, 3, -100] 1115 ].fuse; 1116 1117 auto result = [ 1118 [28.333333, 0.333333, -1.0, -0.666667, 0.0], 1119 [ 0.333333, 0.777778, 0.0, -0.666667, -12.111111] 1120 ].fuse; 1121 1122 // Use byDim with map to scale by row/column. 1123 auto xRobustScaleByDim = x.byDim!0.map!robustScale; 1124 auto resultByDim = result.byDim!0; 1125 assert(xRobustScaleByDim.equal!(equal!approxEqual)(resultByDim)); 1126 1127 auto xRobustScaleAlongDim = x.alongDim!1.map!robustScale; 1128 auto resultAlongDim = result.alongDim!1; 1129 assert(xRobustScaleAlongDim.equal!(equal!approxEqual)(resultAlongDim)); 1130 } 1131 1132 /// Can control `QuantileAlgo` and output type 1133 version(mir_stat_test) 1134 @safe pure nothrow 1135 unittest 1136 { 1137 import mir.algorithm.iteration: all; 1138 import mir.math.common: approxEqual; 1139 import mir.ndslice.slice: sliced; 1140 import mir.ndslice.topology: repeat; 1141 1142 //Set `QuantileAlgo` algorithm or output type 1143 auto x = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100].sliced; 1144 1145 assert(x.robustScale!("type9").all!approxEqual([11.864407, 0.474576, -0.0677966, 0.0677966, 0.338983, -0.0677966, 0.474576, -0.474576, -1.288136, -15.254237])); 1146 assert(x.robustScale!("type1").all!approxEqual([12.500000, 0.500000, -0.0714286, 0.0714286, 0.357143, -0.0714286, 0.500000, -0.500000, -1.357143, -16.071429])); 1147 assert(x.robustScale!(float, "type6").all!approxEqual([10.294118f, 0.411765f, -0.0588235f, 0.0588235f, 0.294118f, -0.0588235f, 0.411765f, -0.411765f, -1.117647f, -13.235294f])); 1148 1149 auto y = [uint.max, uint.max / 2, uint.max / 3].sliced; 1150 assert(y.robustScale!"type1".all!approxEqual([0.75, 0, -0.25])); 1151 1152 auto z = [ulong.max, ulong.max / 2, ulong.max / 3].sliced; 1153 assert(z.robustScale!(ulong, "type1").all!approxEqual([0.75, 0, -0.25])); 1154 } 1155 1156 // robustScale withAsSlice 1157 version(mir_stat_test) 1158 @safe pure nothrow @nogc 1159 unittest 1160 { 1161 import mir.algorithm.iteration: all; 1162 import mir.math.common: approxEqual; 1163 import mir.rc.array: RCArray; 1164 1165 static immutable value = [100.0, 16, 12, 13, 15, 12, 16, 9, 3, -100]; 1166 1167 auto x = RCArray!double(10); 1168 foreach(i, ref e; x) 1169 e = value[i]; 1170 1171 static immutable result = [14.583333, 0.583333, -0.083333, 0.083333, 0.416667, -0.083333, 0.583333, -0.583333, -1.583333, -18.750000]; 1172 assert(x.robustScale.all!approxEqual(result)); 1173 }