1 /++ 2 This module contains algorithms for multivariate descriptive statistics. 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 6 Authors: John Michael Hall 7 8 Copyright: 2023 Mir Stat Authors. 9 10 Macros: 11 SUBREF = $(REF_ALTTEXT $(TT $2), $2, mir, stat, $1)$(NBSP) 12 MATHREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, math, $1)$(NBSP) 13 MATHREF_ALT = $(GREF_ALTTEXT mir-algorithm, $(B $(TT $2)), $2, mir, math, $1)$(NBSP) 14 NDSLICEREF = $(GREF_ALTTEXT mir-algorithm, $(TT $2), $2, mir, ndslice, $1)$(NBSP) 15 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 16 T3=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3)) 17 T4=$(TR $(TDNW $(LREF $1)) $(TD $2) $(TD $3) $(TD $4)) 18 19 +/ 20 21 module mir.stat.descriptive.multivariate; 22 23 import mir.internal.utility: isFloatingPoint; 24 import mir.math.sum: Summation, Summator; 25 import std.traits: isMutable; 26 27 private void putter3(Slices, T, U, Summation summation1, Summation summation2, Summation summation3) 28 (scope Slices slices, ref Summator!(T, summation1) seed1, ref Summator!(U, summation2) seed2, ref Summator!(U, summation3) seed3) 29 { 30 import mir.functional: Tuple; 31 static if (is(Slices == Tuple!(V1, V2, V3), V1, V2, V3)) { 32 seed1.put(slices[0]); 33 seed2.put(slices[1]); 34 seed3.put(slices[2]); 35 } else { 36 import mir.ndslice.internal: frontOfDim; 37 do 38 { 39 frontOfDim!(0, slices)[0].putter3(seed1, seed2, seed3); 40 slices.popFront; 41 } 42 while(!slices.empty); 43 } 44 } 45 46 /++ 47 Covariance algorithms. 48 49 See Also: 50 $(WEB en.wikipedia.org/wiki/Algorithms_for_calculating_variance, Algorithms for calculating variance). 51 +/ 52 enum CovarianceAlgo 53 { 54 /++ 55 Performs Welford's online algorithm for updating covariance. While it only 56 iterates each input once, it can be slower for smaller inputs. However, it 57 is also more accurate. Can also `put` another CovarianceAccumulator of the 58 same type, which uses the parallel algorithm from Chan et al. 59 +/ 60 online, 61 62 /++ 63 Calculates covariance using E(x*y) - E(x)*E(y) (alowing for adjustments for 64 population/sample variance). This algorithm can be numerically unstable. 65 +/ 66 naive, 67 68 /++ 69 Calculates covariance using a two-pass algorithm whereby the inputs are first 70 centered and then the sum of products is calculated from that. May be faster 71 than `online` and generally more accurate than the `naive` algorithm. 72 +/ 73 twoPass, 74 75 /++ 76 Calculates covariance assuming the mean of the inputs is zero. 77 +/ 78 assumeZeroMean, 79 80 /++ 81 When slices, slice-like objects, or ranges are the inputs, uses the two-pass 82 algorithm. When an individual data-point is added, uses the online algorithm. 83 +/ 84 hybrid 85 } 86 87 /// 88 struct CovarianceAccumulator(T, CovarianceAlgo covarianceAlgo, Summation summation) 89 if (isMutable!T && covarianceAlgo == CovarianceAlgo.naive) 90 { 91 import mir.math.sum: elementType, Summator; 92 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 93 import mir.primitives: isInputRange, front, empty, popFront; 94 95 /// 96 private size_t _count; 97 /// 98 alias S = Summator!(T, summation); 99 /// 100 S summatorLeft; 101 /// 102 S summatorRight; 103 /// 104 S summatorOfProducts; 105 106 /// 107 this(RangeX, RangeY)(RangeX x, RangeY y) 108 if (isInputRange!RangeX && 109 isInputRange!RangeY) 110 { 111 import core.lifetime: move; 112 this.put(x.move, y.move); 113 } 114 115 /// 116 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 117 Slice!(IteratorX, 1, kindX) x, 118 Slice!(IteratorY, 1, kindY) y 119 ) 120 in 121 { 122 assert(x.length == y.length, 123 "CovarianceAcumulator.put: both vectors must have the same length"); 124 } 125 do 126 { 127 import mir.ndslice.topology: zip, map; 128 129 _count += x.length; 130 summatorLeft.put(x); 131 summatorRight.put(y); 132 summatorOfProducts.put(x.zip(y).map!"a * b"); 133 } 134 135 /// 136 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 137 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 138 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 139 { 140 import mir.ndslice.slice: toSlice; 141 this.put(x.toSlice, y.toSlice); 142 } 143 144 /// 145 void put(RangeX, RangeY)(RangeX x, RangeY y) 146 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 147 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 148 { 149 do 150 { 151 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 152 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 153 this.put(x.front, y.front); 154 x.popFront; 155 y.popFront; 156 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 157 } 158 159 /// 160 void put()(T x, T y) 161 { 162 _count++; 163 summatorLeft.put(x); 164 summatorRight.put(y); 165 summatorOfProducts.put(x * y); 166 } 167 168 /// 169 void put(U, Summation sumAlgo)(CovarianceAccumulator!(U, covarianceAlgo, sumAlgo) v) 170 { 171 _count += v.count; 172 summatorLeft.put(v.sumLeft!U); 173 summatorRight.put(v.sumRight!U); 174 summatorOfProducts.put(v.sumOfProducts!U); 175 } 176 177 const: 178 179 /// 180 size_t count() @property 181 { 182 return _count; 183 } 184 /// 185 F sumLeft(F = T)() @property 186 { 187 return cast(F) summatorLeft.sum; 188 } 189 /// 190 F sumRight(F = T)() @property 191 { 192 return cast(F) summatorRight.sum; 193 } 194 /// 195 F meanLeft(F = T)() @property 196 { 197 return sumLeft!F / count; 198 } 199 /// 200 F meanRight(F = T)() @property 201 { 202 return sumRight!F / count; 203 } 204 /// 205 F sumOfProducts(F = T)() @property 206 { 207 return cast(F) summatorOfProducts.sum; 208 } 209 /// 210 F centeredSumOfProducts(F = T)() @property 211 { 212 return sumOfProducts!F - sumLeft!F * sumRight!F / count; 213 } 214 /// 215 F covariance(F = T)(bool isPopulation) @property 216 in (count + isPopulation > 1, "More data points required") 217 { 218 return sumOfProducts!F / (count + isPopulation - 1) - 219 (sumLeft!F * sumRight!F) * (F(1) / (count * (count + isPopulation - 1))); 220 } 221 } 222 223 /// 224 version(mir_stat_test) 225 @safe pure nothrow 226 unittest 227 { 228 import mir.math.sum: Summation; 229 import mir.ndslice.slice: sliced; 230 import mir.test: shouldApprox; 231 232 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 233 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 234 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 235 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 236 237 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v; 238 v.put(x, y); 239 240 v.covariance(true).shouldApprox == 82.25 / 12 - (29.25 * 36) / (12 * 12); 241 v.covariance(false).shouldApprox == 82.25 / 11 - (29.25 * 36) / (12 * 12) * (12.0 / 11); 242 243 v.put(4.0, 3.0); 244 v.covariance(true).shouldApprox == 94.25 / 13 - (33.25 * 39) / (13 * 13); 245 v.covariance(false).shouldApprox == 94.25 / 12 - (33.25 * 39) / (13 * 13) * (13.0 / 12); 246 } 247 248 // Check dynamic array 249 version(mir_stat_test) 250 @safe pure nothrow 251 unittest 252 { 253 import mir.math.sum: Summation; 254 import mir.ndslice.slice: sliced; 255 import mir.test: shouldApprox; 256 257 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 258 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 259 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 260 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 261 262 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v; 263 v.put(x, y); 264 265 v.covariance(true).shouldApprox == 82.25 / 12 - (29.25 * 36) / (12 * 12); 266 v.covariance(false).shouldApprox == 82.25 / 11 - (29.25 * 36) / (12 * 12) * (12.0 / 11); 267 268 v.meanLeft.shouldApprox == 2.4375; 269 v.meanRight.shouldApprox == 3; 270 271 v.put(4.0, 3.0); 272 v.covariance(true).shouldApprox == 94.25 / 13 - (33.25 * 39) / (13 * 13); 273 v.covariance(false).shouldApprox == 94.25 / 12 - (33.25 * 39) / (13 * 13) * (13.0 / 12); 274 } 275 276 // rcslice test 277 version(mir_stat_test) 278 @safe pure nothrow @nogc 279 unittest 280 { 281 import mir.math.sum: Summation; 282 import mir.ndslice.allocation: mininitRcslice; 283 import mir.test: shouldApprox; 284 285 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 286 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 287 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 288 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 289 auto x = mininitRcslice!double(12); 290 auto y = mininitRcslice!double(12); 291 x[] = a; 292 y[] = b; 293 auto v = CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive)(x, y); 294 295 v.covariance(true).shouldApprox == -5.5 / 12; 296 v.covariance(false).shouldApprox == -5.5 / 11; 297 } 298 299 // Check adding CovarianceAccumultors 300 version(mir_stat_test) 301 @safe pure nothrow 302 unittest 303 { 304 import mir.math.sum: sum, Summation; 305 import mir.ndslice.slice: sliced; 306 import mir.test: shouldApprox; 307 308 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 309 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 310 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 311 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 312 313 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v1; 314 v1.put(x1, y1); 315 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v2; 316 v2.put(x2, y2); 317 v1.put(v2); 318 319 v1.covariance(true).shouldApprox == -5.5 / 12; 320 v1.covariance(false).shouldApprox == -5.5 / 11; 321 } 322 323 // Test input range 324 version(mir_stat_test) 325 @safe pure nothrow 326 unittest 327 { 328 import mir.math.sum: Summation; 329 import mir.test: should; 330 import std.range: iota; 331 332 auto x = iota(0, 5); 333 auto y = iota(-3, 2); 334 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v; 335 v.put(x, y); 336 v.covariance(true).should == 10.0 / 5; 337 } 338 339 /// 340 struct CovarianceAccumulator(T, CovarianceAlgo covarianceAlgo, Summation summation) 341 if (isFloatingPoint!T && isMutable!T && covarianceAlgo == CovarianceAlgo.online) 342 { 343 import mir.math.sum: elementType, Summator; 344 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 345 import mir.primitives: isInputRange, front, empty, popFront; 346 347 /// 348 private size_t _count; 349 /// 350 alias S = Summator!(T, summation); 351 /// 352 S summatorLeft; 353 /// 354 S summatorRight; 355 /// 356 S centeredSummatorOfProducts; 357 358 /// 359 this(RangeX, RangeY)(RangeX x, RangeY y) 360 if (isInputRange!RangeX && isInputRange!RangeY) 361 { 362 import core.lifetime: move; 363 this.put(x.move, y.move); 364 } 365 366 /// 367 this()(T x, T y) 368 { 369 this.put(x, y); 370 } 371 372 /// 373 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 374 Slice!(IteratorX, 1, kindX) x, 375 Slice!(IteratorY, 1, kindY) y 376 ) 377 in 378 { 379 assert(x.length == y.length, 380 "CovarianceAcumulator.put: both vectors must have the same length"); 381 } 382 do 383 { 384 import mir.ndslice.topology: zip; 385 386 foreach(e; x.zip(y)) { 387 this.put(e[0], e[1]); 388 } 389 } 390 391 /// 392 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 393 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 394 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 395 { 396 import mir.ndslice.slice: toSlice; 397 this.put(x.toSlice, y.toSlice); 398 } 399 400 /// 401 void put(RangeX, RangeY)(RangeX x, RangeY y) 402 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && 403 isInputRange!RangeY && !isConvertibleToSlice!RangeY) 404 { 405 import std.range: zip; 406 foreach(a, b; zip(x, y)) { 407 this.put(a, b); 408 } 409 } 410 411 /// 412 void put()(T x, T y) 413 { 414 T delta = x; 415 if (count > 0) { 416 delta -= meanLeft; 417 } 418 _count++; 419 summatorLeft.put(x); 420 summatorRight.put(y); 421 centeredSummatorOfProducts.put(delta * (y - meanRight)); 422 } 423 424 /// 425 void put(U, CovarianceAlgo covAlgo, Summation sumAlgo)(CovarianceAccumulator!(U, covAlgo, sumAlgo) v) 426 if (covAlgo != CovarianceAlgo.assumeZeroMean) 427 { 428 size_t oldCount = count; 429 T deltaLeft = v.meanLeft; 430 T deltaRight = v.meanRight; 431 if (count > 0) { 432 deltaLeft -= meanLeft!T; 433 deltaRight -= meanRight!T; 434 } 435 _count += v.count; 436 summatorLeft.put(v.sumLeft!T); 437 summatorRight.put(v.sumRight!T); 438 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T + deltaLeft * deltaRight * v.count * oldCount / count); 439 } 440 441 const: 442 443 /// 444 size_t count() @property 445 { 446 return _count; 447 } 448 /// 449 F sumLeft(F = T)() @property 450 { 451 return cast(F) summatorLeft.sum; 452 } 453 /// 454 F sumRight(F = T)() @property 455 { 456 return cast(F) summatorRight.sum; 457 } 458 /// 459 F meanLeft(F = T)() @property 460 { 461 return sumLeft!F / count; 462 } 463 /// 464 F meanRight(F = T)() @property 465 { 466 return sumRight!T / count; 467 } 468 /// 469 F centeredSumOfProducts(F = T)() @property 470 { 471 return cast(F) centeredSummatorOfProducts.sum; 472 } 473 /// 474 F covariance(F = T)(bool isPopulation) @property 475 in (count + isPopulation > 1, "More data points required") 476 { 477 return centeredSumOfProducts!F / (count + isPopulation - 1); 478 } 479 } 480 481 /// 482 version(mir_stat_test) 483 @safe pure nothrow 484 unittest 485 { 486 import mir.math.sum: Summation; 487 import mir.ndslice.slice: sliced; 488 import mir.test: shouldApprox; 489 490 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 491 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 492 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 493 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 494 495 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v; 496 v.put(x, y); 497 498 v.covariance(true).shouldApprox == -5.5 / 12; 499 v.covariance(false).shouldApprox == -5.5 / 11; 500 501 v.put(4.0, 3.0); 502 v.covariance(true).shouldApprox == -5.5 / 13; 503 v.covariance(false).shouldApprox == -5.5 / 12; 504 } 505 506 // Check dynamic array 507 version(mir_stat_test) 508 @safe pure nothrow 509 unittest 510 { 511 import mir.math.sum: Summation; 512 import mir.ndslice.slice: sliced; 513 import mir.test: shouldApprox; 514 515 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 516 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 517 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 518 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 519 520 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v; 521 v.put(x, y); 522 523 v.covariance(true).shouldApprox == -5.5 / 12; 524 v.covariance(false).shouldApprox == -5.5 / 11; 525 526 v.meanLeft.shouldApprox == 2.4375; 527 v.meanRight.shouldApprox == 3; 528 529 v.put(4.0, 3.0); 530 v.covariance(true).shouldApprox == -5.5 / 13; 531 v.covariance(false).shouldApprox == -5.5 / 12; 532 } 533 534 // rcslice test 535 version(mir_stat_test) 536 @safe pure nothrow @nogc 537 unittest 538 { 539 import mir.math.sum: Summation; 540 import mir.ndslice.allocation: mininitRcslice; 541 import mir.test: shouldApprox; 542 543 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 544 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 545 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 546 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 547 auto x = mininitRcslice!double(12); 548 auto y = mininitRcslice!double(12); 549 x[] = a; 550 y[] = b; 551 auto v = CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive)(x, y); 552 553 v.covariance(true).shouldApprox == -5.5 / 12; 554 v.covariance(false).shouldApprox == -5.5 / 11; 555 } 556 557 // Check adding CovarianceAccumultors 558 version(mir_stat_test) 559 @safe pure nothrow 560 unittest 561 { 562 import mir.math.sum: sum, Summation; 563 import mir.ndslice.slice: sliced; 564 import mir.test: shouldApprox; 565 566 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 567 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 568 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 569 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 570 571 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v1; 572 v1.put(x1, y1); 573 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v2; 574 v2.put(x2, y2); 575 v1.put(v2); 576 577 v1.covariance(true).shouldApprox == -5.5 / 12; 578 v1.covariance(false).shouldApprox == -5.5 / 11; 579 } 580 581 // Check adding CovarianceAccumultors (naive) 582 version(mir_stat_test) 583 @safe pure nothrow 584 unittest 585 { 586 import mir.math.sum: sum, Summation; 587 import mir.ndslice.slice: sliced; 588 import mir.test: shouldApprox; 589 590 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 591 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 592 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 593 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 594 595 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v1; 596 v1.put(x1, y1); 597 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v2; 598 v2.put(x2, y2); 599 v1.put(v2); 600 601 v1.covariance(true).shouldApprox == -5.5 / 12; 602 v1.covariance(false).shouldApprox == -5.5 / 11; 603 } 604 605 // Check adding CovarianceAccumultors (twoPass) 606 version(mir_stat_test) 607 @safe pure nothrow 608 unittest 609 { 610 import mir.math.sum: sum, Summation; 611 import mir.ndslice.slice: sliced; 612 import mir.test: shouldApprox; 613 614 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 615 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 616 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 617 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 618 619 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v1; 620 v1.put(x1, y1); 621 auto v2 = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x2, y2); 622 v1.put(v2); 623 624 v1.covariance(true).shouldApprox == -5.5 / 12; 625 v1.covariance(false).shouldApprox == -5.5 / 11; 626 } 627 628 // Initializing with one point 629 version(mir_stat_test) 630 @safe pure nothrow 631 unittest 632 { 633 import mir.math.sum: Summation; 634 import mir.test: should; 635 636 auto v = CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive)(4.0, 3.0); 637 v.covariance(true).should == 0; 638 } 639 640 // Test input range 641 version(mir_stat_test) 642 @safe pure nothrow 643 unittest 644 { 645 import mir.math.sum: Summation; 646 import mir.test: should; 647 import std.range: chunks, iota; 648 649 auto x = iota(0, 5); 650 auto y = iota(-3, 2); 651 CovarianceAccumulator!(double, CovarianceAlgo.online, Summation.naive) v; 652 v.put(x, y); 653 v.covariance(true).should == 10 / 5; 654 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v2; 655 v2.put(x.chunks(1), y.chunks(1)); 656 v2.covariance(true).should == 10 / 5; 657 } 658 659 /// 660 struct CovarianceAccumulator(T, CovarianceAlgo covarianceAlgo, Summation summation) 661 if (isMutable!T && covarianceAlgo == CovarianceAlgo.twoPass) 662 { 663 import mir.functional: naryFun; 664 import mir.math.sum: elementType, Summator; 665 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 666 import mir.primitives: isInputRange, front, empty, popFront; 667 import mir.stat.descriptive.univariate: MeanAccumulator; 668 669 /// 670 private size_t _count; 671 /// 672 alias S = Summator!(T, summation); 673 /// 674 private S summatorLeft; 675 /// 676 private S summatorRight; 677 /// 678 private S centeredSummatorOfProducts; 679 680 /// 681 this(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 682 Slice!(IteratorX, 1, kindX) x, Slice!(IteratorY, 1, kindY) y) 683 in 684 { 685 assert(x.length == y.length, 686 "CovarianceAcumulator.put: both vectors must have the same length"); 687 } 688 do 689 { 690 import mir.ndslice.internal: LeftOp; 691 import mir.ndslice.topology: map, vmap, zip; 692 693 _count = x.length; 694 summatorLeft.put(x.lightScope); 695 summatorRight.put(y.lightScope); 696 centeredSummatorOfProducts.put(x.vmap(LeftOp!("-", T)(meanLeft)).zip(y.vmap(LeftOp!("-", T)(meanRight))).map!(naryFun!"a * b")); 697 } 698 699 /// 700 this(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 701 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 702 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 703 { 704 import mir.ndslice.slice: toSlice; 705 this(x.toSlice, y.toSlice); 706 } 707 708 /// 709 this(RangeX, RangeY)(RangeX x, RangeY y) 710 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 711 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 712 { 713 import mir.primitives: elementCount, hasShape; 714 715 static if (hasShape!RangeX && hasShape!RangeY) { 716 assert(x.elementCount == y.elementCount); 717 _count += x.elementCount; 718 summatorLeft.put(x); 719 summatorRight.put(y); 720 } else { 721 import std.range: zip; 722 723 foreach(a, b; zip(x, y)) { 724 _count++; 725 summatorLeft.put(a); 726 summatorRight.put(b); 727 } 728 } 729 730 T xMean = meanLeft; 731 T yMean = meanRight; 732 do 733 { 734 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 735 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 736 centeredSummatorOfProducts.put((x.front - xMean) * (y.front - yMean)); 737 x.popFront; 738 y.popFront; 739 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 740 } 741 742 /// 743 this()(T x, T y) 744 { 745 _count++; 746 summatorLeft.put(x); 747 summatorRight.put(y); 748 centeredSummatorOfProducts.put(0); 749 } 750 751 const: 752 753 /// 754 size_t count() @property 755 { 756 return _count; 757 } 758 /// 759 F sumLeft(F = T)() @property 760 { 761 return cast(F) summatorLeft.sum; 762 } 763 /// 764 F sumRight(F = T)() @property 765 { 766 return cast(F) summatorRight.sum; 767 } 768 /// 769 F meanLeft(F = T)() @property 770 { 771 return sumLeft!F / count; 772 } 773 /// 774 F meanRight(F = T)() @property 775 { 776 return sumRight!F / count; 777 } 778 /// 779 F centeredSumOfProducts(F = T)() @property 780 { 781 return cast(F) centeredSummatorOfProducts.sum; 782 } 783 /// 784 F covariance(F = T)(bool isPopulation) @property 785 in (count + isPopulation > 1, "More data points required") 786 { 787 return centeredSumOfProducts!F / (count + isPopulation - 1); 788 } 789 } 790 791 /// 792 version(mir_stat_test) 793 @safe pure nothrow 794 unittest 795 { 796 import mir.math.sum: Summation; 797 import mir.ndslice.slice: sliced; 798 import mir.test: shouldApprox; 799 800 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 801 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 802 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 803 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 804 805 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x, y); 806 807 v.covariance(true).shouldApprox == -5.5 / 12; 808 v.covariance(false).shouldApprox == -5.5 / 11; 809 } 810 811 // Check dynamic array 812 version(mir_stat_test) 813 @safe pure nothrow 814 unittest 815 { 816 import mir.math.sum: Summation; 817 import mir.ndslice.slice: sliced; 818 import mir.test: shouldApprox; 819 820 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 821 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 822 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 823 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 824 825 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x, y); 826 827 v.covariance(true).shouldApprox == -5.5 / 12; 828 v.covariance(false).shouldApprox == -5.5 / 11; 829 830 v.meanLeft.shouldApprox == 2.4375; 831 v.meanRight.shouldApprox ==3; 832 } 833 834 // rcslice test 835 version(mir_stat_test) 836 @safe pure nothrow @nogc 837 unittest 838 { 839 import mir.math.sum: Summation; 840 import mir.ndslice.allocation: mininitRcslice; 841 import mir.test: shouldApprox; 842 843 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 844 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 845 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 846 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 847 auto x = mininitRcslice!double(12); 848 auto y = mininitRcslice!double(12); 849 x[] = a; 850 y[] = b; 851 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x, y); 852 853 v.covariance(true).shouldApprox == -5.5 / 12; 854 v.covariance(false).shouldApprox == -5.5 / 11; 855 } 856 857 // Check Vmap 858 version(mir_stat_test) 859 @safe pure nothrow 860 unittest 861 { 862 import mir.math.sum: Summation; 863 import mir.ndslice.slice: sliced; 864 import mir.test: shouldApprox; 865 866 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 867 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 868 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 869 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 870 auto x = a + 1; 871 auto y = b - 1; 872 873 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x, y); 874 875 v.covariance(true).shouldApprox == -5.5 / 12; 876 v.covariance(false).shouldApprox == -5.5 / 11; 877 } 878 879 // Initializing with one point 880 version(mir_stat_test) 881 @safe pure nothrow 882 unittest 883 { 884 import mir.math.sum: Summation; 885 import mir.test: should; 886 887 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(4.0, 3.0); 888 v.centeredSumOfProducts.should == 0; 889 } 890 891 // withAsSlice test 892 version(mir_stat_test) 893 @safe pure nothrow @nogc 894 unittest 895 { 896 import mir.rc.array: RCArray; 897 import mir.test: shouldApprox; 898 899 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 900 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 901 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 902 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 903 904 auto x = RCArray!double(12); 905 foreach(i, ref e; x) 906 e = a[i]; 907 auto y = RCArray!double(12); 908 foreach(i, ref e; y) 909 e = b[i]; 910 911 auto v = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x, y); 912 v.covariance(true).shouldApprox == -5.5 / 12; 913 v.covariance(false).shouldApprox == -5.5 / 11; 914 } 915 916 // Test input range 917 version(mir_stat_test) 918 @safe pure nothrow 919 unittest 920 { 921 import mir.math.sum: Summation; 922 import mir.test: should; 923 import std.range: iota; 924 import std.algorithm: map; 925 926 auto x1 = iota(0, 5); 927 auto y1 = iota(-3, 2); 928 auto v1 = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x1, y1); 929 v1.covariance(true).should == 10.0 / 5; 930 931 // this version can't use elementCount 932 auto x2 = x1.map!(a => 2 * a); 933 auto y2 = y1.map!(a => 2 * a); 934 auto v2 = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x2, y2); 935 v2.covariance(true).should == 40.0 / 5; 936 } 937 938 /// 939 struct CovarianceAccumulator(T, CovarianceAlgo covarianceAlgo, Summation summation) 940 if (isMutable!T && covarianceAlgo == CovarianceAlgo.assumeZeroMean) 941 { 942 import mir.math.sum: elementType, Summator; 943 import mir.ndslice.slice: Slice, SliceKind, hasAsSlice, isConvertibleToSlice, isSlice; 944 import mir.primitives: isInputRange, front, empty, popFront; 945 946 private size_t _count; 947 948 /// 949 Summator!(T, summation) centeredSummatorOfProducts; 950 951 /// 952 this(RangeX, RangeY)(RangeX x, RangeY y) 953 if (isInputRange!RangeX && isInputRange!RangeY) 954 { 955 this.put(x, y); 956 } 957 958 /// 959 this()(T x, T y) 960 { 961 this.put(x, y); 962 } 963 964 /// 965 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 966 Slice!(IteratorX, 1, kindX) x, 967 Slice!(IteratorY, 1, kindY) y 968 ) 969 in 970 { 971 assert(x.length == y.length, 972 "CovarianceAcumulator.put: both vectors must have the same length"); 973 } 974 do 975 { 976 import mir.ndslice.topology: zip, map; 977 978 _count += x.length; 979 centeredSummatorOfProducts.put(x.zip(y).map!"a * b"); 980 } 981 982 /// 983 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 984 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 985 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 986 { 987 import mir.ndslice.slice: toSlice; 988 this.put(x.toSlice, y.toSlice); 989 } 990 991 /// 992 void put(RangeX, RangeY)(RangeX x, RangeY y) 993 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 994 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 995 { 996 do 997 { 998 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 999 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 1000 this.put(x.front, y.front); 1001 x.popFront; 1002 y.popFront; 1003 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 1004 } 1005 1006 /// 1007 void put()(T x, T y) 1008 { 1009 _count++; 1010 centeredSummatorOfProducts.put(x * y); 1011 } 1012 1013 /// 1014 void put(U, Summation sumAlgo)(CovarianceAccumulator!(U, covarianceAlgo, sumAlgo) v) 1015 { 1016 _count += v.count; 1017 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T); 1018 } 1019 1020 const: 1021 1022 /// 1023 size_t count() @property 1024 { 1025 return _count; 1026 } 1027 /// 1028 F sumLeft(F = T)() @property 1029 { 1030 return 0; 1031 } 1032 /// 1033 F sumRight(F = T)() @property 1034 { 1035 return 0; 1036 } 1037 /// 1038 F meanLeft(F = T)() @property 1039 { 1040 return 0; 1041 } 1042 /// 1043 F meanRight(F = T)() @property 1044 { 1045 return 0; 1046 } 1047 /// 1048 F centeredSumOfProducts(F = T)() @property 1049 { 1050 return cast(F) centeredSummatorOfProducts.sum; 1051 } 1052 /// 1053 F covariance(F = T)(bool isPopulation) @property 1054 in (count + isPopulation > 1, "More data points required") 1055 { 1056 return centeredSumOfProducts!F / (count + isPopulation - 1); 1057 } 1058 } 1059 1060 /// 1061 version(mir_stat_test) 1062 @safe pure nothrow 1063 unittest 1064 { 1065 import mir.stat.transform: center; 1066 import mir.math.sum: Summation; 1067 import mir.ndslice.slice: sliced; 1068 import mir.test: shouldApprox; 1069 1070 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1071 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1072 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1073 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1074 auto x = a.center; 1075 auto y = b.center; 1076 1077 CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive) v; 1078 v.put(x, y); 1079 1080 v.covariance(true).shouldApprox == -5.5 / 12; 1081 v.covariance(false).shouldApprox == -5.5 / 11; 1082 1083 v.put(4.0, 3.0); 1084 v.covariance(true).shouldApprox == 6.5 / 13; 1085 v.covariance(false).shouldApprox == 6.5 / 12; 1086 } 1087 1088 // Check dynamic array 1089 version(mir_stat_test) 1090 @safe pure nothrow 1091 unittest 1092 { 1093 import mir.math.sum: Summation; 1094 import mir.ndslice.slice: sliced; 1095 import mir.stat.descriptive.univariate: mean; 1096 import mir.test: should, shouldApprox; 1097 1098 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1099 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1100 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1101 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1102 auto aMean = a.mean; 1103 auto bMean = b.mean; 1104 auto x = a.dup; 1105 auto y = b.dup; 1106 for (size_t i; i < a.length; i++) { 1107 x[i] -= aMean; 1108 y[i] -= bMean; 1109 } 1110 1111 CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive) v; 1112 v.put(x, y); 1113 1114 v.covariance(true).shouldApprox == -5.5 / 12; 1115 v.covariance(false).shouldApprox == -5.5 / 11; 1116 1117 v.put(4.0, 3.0); 1118 v.covariance(true).shouldApprox == 6.5 / 13; 1119 v.covariance(false).shouldApprox == 6.5 / 12; 1120 1121 v.meanLeft.should == 0; 1122 v.meanRight.should == 0; 1123 } 1124 1125 // rcslice test 1126 version(mir_stat_test) 1127 @safe pure nothrow @nogc 1128 unittest 1129 { 1130 import mir.math.sum: Summation; 1131 import mir.ndslice.allocation: mininitRcslice; 1132 import mir.stat.transform: center; 1133 import mir.test: shouldApprox; 1134 1135 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1136 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1137 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1138 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1139 auto x = mininitRcslice!double(12); 1140 auto y = mininitRcslice!double(12); 1141 x[] = a; 1142 y[] = b; 1143 auto v = CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive)(x.center, y.center); 1144 1145 v.covariance(true).shouldApprox == -5.5 / 12; 1146 v.covariance(false).shouldApprox == -5.5 / 11; 1147 } 1148 1149 // Check adding CovarianceAccumultors 1150 version(mir_stat_test) 1151 @safe pure nothrow 1152 unittest 1153 { 1154 import mir.math.sum: sum, Summation; 1155 import mir.ndslice.slice: sliced; 1156 import mir.test: shouldApprox; 1157 1158 auto a1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 1159 auto b1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 1160 auto a2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1161 auto b2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1162 auto meanA = (a1.sum + a2.sum) / 12; 1163 auto meanB = (b1.sum + b2.sum) / 12; 1164 auto x1 = a1 - meanA; 1165 auto y1 = b1 - meanB; 1166 auto x2 = a2 - meanA; 1167 auto y2 = b2 - meanB; 1168 1169 CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive) v1; 1170 v1.put(x1, y1); 1171 CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive) v2; 1172 v2.put(x2, y2); 1173 v1.put(v2); 1174 1175 v1.covariance(true).shouldApprox == -5.5 / 12; 1176 v1.covariance(false).shouldApprox == -5.5 / 11; 1177 } 1178 1179 // Initializing with one point 1180 version(mir_stat_test) 1181 @safe pure nothrow 1182 unittest 1183 { 1184 import mir.math.sum: Summation; 1185 import mir.test: should; 1186 1187 auto v = CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive)(4.0, 3.0); 1188 v.centeredSumOfProducts.should == 12; 1189 } 1190 1191 // Test input range 1192 version(mir_stat_test) 1193 @safe pure nothrow 1194 unittest 1195 { 1196 import mir.math.sum: Summation; 1197 import mir.test: should; 1198 import std.range: iota; 1199 1200 auto x = iota(0, 5); 1201 auto y = iota(-3, 2); 1202 auto v = CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive)(x, y); 1203 v.centeredSumOfProducts.should == 0; 1204 } 1205 1206 /// 1207 struct CovarianceAccumulator(T, CovarianceAlgo covarianceAlgo, Summation summation) 1208 if (isFloatingPoint!T && isMutable!T && covarianceAlgo == CovarianceAlgo.hybrid) 1209 { 1210 import mir.functional: naryFun; 1211 import mir.math.sum: elementType, Summator; 1212 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 1213 import mir.primitives: isInputRange, front, empty, popFront; 1214 1215 /// 1216 private size_t _count; 1217 /// 1218 alias S = Summator!(T, summation); 1219 /// 1220 S summatorLeft; 1221 /// 1222 S summatorRight; 1223 /// 1224 S centeredSummatorOfProducts; 1225 1226 /// 1227 this()(T x, T y) 1228 { 1229 this.put(x, y); 1230 } 1231 1232 /// 1233 this(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 1234 Slice!(IteratorX, 1, kindX) x, 1235 Slice!(IteratorY, 1, kindY) y 1236 ) 1237 in 1238 { 1239 assert(x.length == y.length, 1240 "CovarianceAcumulator.put: both vectors must have the same length"); 1241 } 1242 do 1243 { 1244 import mir.ndslice.internal: LeftOp; 1245 import mir.ndslice.topology: map, vmap, zip; 1246 1247 _count += x.length; 1248 summatorLeft.put(x.lightScope); 1249 summatorRight.put(y.lightScope); 1250 centeredSummatorOfProducts.put(x.vmap(LeftOp!("-", T)(meanLeft)).zip(y.vmap(LeftOp!("-", T)(meanRight))).map!(naryFun!"a * b")); 1251 } 1252 1253 /// 1254 this(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 1255 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 1256 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 1257 { 1258 import mir.ndslice.slice: toSlice; 1259 this(x.toSlice, y.toSlice); 1260 } 1261 1262 /// 1263 this(RangeX, RangeY)(RangeX x, RangeY y) 1264 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && 1265 isInputRange!RangeY && !isConvertibleToSlice!RangeY) 1266 { 1267 static if (is(elementType!RangeX : T) && is(elementType!RangeY : T)) { 1268 import mir.primitives: elementCount, hasShape; 1269 1270 static if (hasShape!RangeX && hasShape!RangeY) { 1271 assert(x.elementCount == y.elementCount); 1272 _count += x.elementCount; 1273 summatorLeft.put(x); 1274 summatorRight.put(y); 1275 } else { 1276 import std.range: zip; 1277 1278 foreach(a, b; zip(x, y)) { 1279 _count++; 1280 summatorLeft.put(a); 1281 summatorRight.put(b); 1282 } 1283 } 1284 1285 T xMean = meanLeft; 1286 T yMean = meanRight; 1287 do 1288 { 1289 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 1290 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 1291 centeredSummatorOfProducts.put((x.front - xMean) * (y.front - yMean)); 1292 x.popFront; 1293 y.popFront; 1294 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 1295 } else { 1296 this.put(x, y); 1297 } 1298 } 1299 1300 /// 1301 void put(RangeX, RangeY)(RangeX x, RangeY y) 1302 if (isInputRange!RangeX && isInputRange!RangeY) 1303 { 1304 static if (is(elementType!RangeX : T) && is(elementType!RangeY : T)) { 1305 auto v = typeof(this)(x, y); 1306 this.put(v); 1307 } else { 1308 import std.range: zip; 1309 foreach(a, b; zip(x, y)) { 1310 this.put(a, b); 1311 } 1312 } 1313 } 1314 1315 /// 1316 void put()(T x, T y) 1317 { 1318 T delta = x; 1319 if (count > 0) { 1320 delta -= meanLeft; 1321 } 1322 _count++; 1323 summatorLeft.put(x); 1324 summatorRight.put(y); 1325 centeredSummatorOfProducts.put(delta * (y - meanRight)); 1326 } 1327 1328 /// 1329 void put(U, CovarianceAlgo covAlgo, Summation sumAlgo)(CovarianceAccumulator!(U, covAlgo, sumAlgo) v) 1330 { 1331 size_t oldCount = count; 1332 T deltaLeft = v.meanLeft!T; 1333 T deltaRight = v.meanRight!T; 1334 if (oldCount > 0) { 1335 deltaLeft -= meanLeft; 1336 deltaRight -= meanRight; 1337 } 1338 _count += v.count; 1339 summatorLeft.put(v.sumLeft!T); 1340 summatorRight.put(v.sumRight!T); 1341 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T + deltaLeft * deltaRight * v.count * oldCount / count); 1342 } 1343 1344 const: 1345 1346 /// 1347 size_t count() @property 1348 { 1349 return _count; 1350 } 1351 /// 1352 F sumLeft(F = T)() @property 1353 { 1354 return cast(F) summatorLeft.sum; 1355 } 1356 /// 1357 F sumRight(F = T)() @property 1358 { 1359 return cast(F) summatorRight.sum; 1360 } 1361 /// 1362 F meanLeft(F = T)() @property 1363 { 1364 return sumLeft!F / count; 1365 } 1366 /// 1367 F meanRight(F = T)() @property 1368 { 1369 return sumRight!F / count; 1370 } 1371 /// 1372 F centeredSumOfProducts(F = T)() @property 1373 { 1374 return cast(F) centeredSummatorOfProducts.sum; 1375 } 1376 /// 1377 F covariance(F = T)(bool isPopulation) @property 1378 in (count + isPopulation > 1, "More data points required") 1379 { 1380 return centeredSumOfProducts!F / (count + isPopulation - 1); 1381 } 1382 } 1383 1384 /// 1385 version(mir_stat_test) 1386 @safe pure nothrow 1387 unittest 1388 { 1389 import mir.math.sum: Summation; 1390 import mir.ndslice.slice: sliced; 1391 import mir.test: shouldApprox; 1392 1393 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1394 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1395 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1396 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1397 1398 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v; 1399 v.put(x, y); 1400 1401 v.covariance(true).shouldApprox == -5.5 / 12; 1402 v.covariance(false).shouldApprox == -5.5 / 11; 1403 1404 v.put(4.0, 3.0); 1405 v.covariance(true).shouldApprox == -5.5 / 13; 1406 v.covariance(false).shouldApprox == -5.5 / 12; 1407 } 1408 1409 // Check dynamic array 1410 version(mir_stat_test) 1411 @safe pure nothrow 1412 unittest 1413 { 1414 import mir.math.sum: Summation; 1415 import mir.ndslice.slice: sliced; 1416 import mir.test: shouldApprox; 1417 1418 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1419 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1420 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1421 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1422 1423 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v; 1424 v.put(x, y); 1425 1426 v.covariance(true).shouldApprox == -5.5 / 12; 1427 v.covariance(false).shouldApprox == -5.5 / 11; 1428 1429 v.meanLeft.shouldApprox == 2.4375; 1430 v.meanRight.shouldApprox == 3; 1431 1432 v.put(4.0, 3.0); 1433 v.covariance(true).shouldApprox == -5.5 / 13; 1434 v.covariance(false).shouldApprox == -5.5 / 12; 1435 } 1436 1437 // rcslice test 1438 version(mir_stat_test) 1439 @safe pure nothrow @nogc 1440 unittest 1441 { 1442 import mir.math.sum: Summation; 1443 import mir.ndslice.allocation: mininitRcslice; 1444 import mir.test: shouldApprox; 1445 1446 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1447 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1448 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1449 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1450 auto x = mininitRcslice!double(12); 1451 auto y = mininitRcslice!double(12); 1452 x[] = a; 1453 y[] = b; 1454 auto v = CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive)(x, y); 1455 1456 v.covariance(true).shouldApprox == -5.5 / 12; 1457 v.covariance(false).shouldApprox == -5.5 / 11; 1458 } 1459 1460 // Check adding CovarianceAccumultors 1461 version(mir_stat_test) 1462 @safe pure nothrow 1463 unittest 1464 { 1465 import mir.math.sum: sum, Summation; 1466 import mir.ndslice.slice: sliced; 1467 import mir.test: shouldApprox; 1468 1469 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 1470 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 1471 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1472 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1473 1474 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v1; 1475 v1.put(x1, y1); 1476 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v2; 1477 v2.put(x2, y2); 1478 v1.put(v2); 1479 1480 v1.covariance(true).shouldApprox == -5.5 / 12; 1481 v1.covariance(false).shouldApprox == -5.5 / 11; 1482 } 1483 1484 // Check adding CovarianceAccumultors (naive) 1485 version(mir_stat_test) 1486 @safe pure nothrow 1487 unittest 1488 { 1489 import mir.math.sum: sum, Summation; 1490 import mir.ndslice.slice: sliced; 1491 import mir.test: shouldApprox; 1492 1493 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 1494 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 1495 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1496 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1497 1498 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v1; 1499 v1.put(x1, y1); 1500 CovarianceAccumulator!(double, CovarianceAlgo.naive, Summation.naive) v2; 1501 v2.put(x2, y2); 1502 v1.put(v2); 1503 1504 v1.covariance(true).shouldApprox == -5.5 / 12; 1505 v1.covariance(false).shouldApprox == -5.5 / 11; 1506 } 1507 1508 // Check adding CovarianceAccumultors (twoPass) 1509 version(mir_stat_test) 1510 @safe pure nothrow 1511 unittest 1512 { 1513 import mir.math.sum: sum, Summation; 1514 import mir.ndslice.slice: sliced; 1515 import mir.test: shouldApprox; 1516 1517 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 1518 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 1519 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1520 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1521 1522 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v1; 1523 v1.put(x1, y1); 1524 auto v2 = CovarianceAccumulator!(double, CovarianceAlgo.twoPass, Summation.naive)(x2, y2); 1525 v1.put(v2); 1526 1527 v1.covariance(true).shouldApprox == -5.5 / 12; 1528 v1.covariance(false).shouldApprox == -5.5 / 11; 1529 } 1530 1531 // Check adding CovarianceAccumultors (assumeZeroMean) 1532 version(mir_stat_test) 1533 @safe pure nothrow 1534 unittest 1535 { 1536 import mir.math.sum: sum, Summation; 1537 import mir.ndslice.slice: sliced; 1538 import mir.stat.transform: center; 1539 import mir.test: shouldApprox; 1540 1541 auto a1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 1542 auto b1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 1543 auto a2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1544 auto b2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1545 auto x1 = a1.center; 1546 auto y1 = b1.center; 1547 auto x2 = a2.center; 1548 auto y2 = b2.center; 1549 1550 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v1; 1551 v1.put(x1, y1); 1552 auto v2 = CovarianceAccumulator!(double, CovarianceAlgo.assumeZeroMean, Summation.naive)(x2, y2); 1553 v1.put(v2); 1554 1555 v1.covariance(true).shouldApprox == -1.9375 / 12; //note: different from above due to inconsistent centering 1556 v1.covariance(false).shouldApprox == -1.9375 / 11; //note: different from above due to inconsistent centering 1557 } 1558 1559 // Initializing with one point 1560 version(mir_stat_test) 1561 @safe pure nothrow 1562 unittest 1563 { 1564 import mir.math.sum: Summation; 1565 import mir.test: should; 1566 1567 auto v = CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive)(4.0, 3.0); 1568 v.covariance(true).should == 0; 1569 } 1570 1571 // Test input range 1572 version(mir_stat_test) 1573 @safe pure nothrow 1574 unittest 1575 { 1576 import mir.math.sum: Summation; 1577 import mir.test: should; 1578 import std.algorithm: map; 1579 import std.range: chunks, iota; 1580 1581 auto x1 = iota(0, 5); 1582 auto y1 = iota(-3, 2); 1583 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v1; 1584 v1.put(x1, y1); 1585 v1.covariance(true).should == 10.0 / 5; 1586 // this version can't use elementCount 1587 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v2; 1588 auto x2 = x1.map!(a => 2 * a); 1589 auto y2 = y1.map!(a => 2 * a); 1590 v2.put(x2, y2); 1591 v2.covariance(true).should == 40.0 / 5; 1592 CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive) v3; 1593 v3.put(x1.chunks(1), y1.chunks(1)); 1594 v3.covariance(true).should == 10.0 / 5; 1595 auto v4 = CovarianceAccumulator!(double, CovarianceAlgo.hybrid, Summation.naive)(x1.chunks(1), y1.chunks(1)); 1596 v4.covariance(true).should == 10.0 / 5; 1597 } 1598 1599 /++ 1600 Calculates the covariance of the inputs. 1601 1602 If `x` and `y` are both slices or convertible to slices, then they must be 1603 one-dimensional. 1604 1605 By default, if `F` is not floating point type, then the result will have a 1606 `double` type if `F` is implicitly convertible to a floating point type. 1607 1608 Params: 1609 F = controls type of output 1610 covarianceAlgo = algorithm for calculating covariance (default: CovarianceAlgo.hybrid) 1611 summation = algorithm for calculating sums (default: Summation.appropriate) 1612 Returns: 1613 The covariance of the inputs 1614 +/ 1615 template covariance(F, 1616 CovarianceAlgo covarianceAlgo = CovarianceAlgo.hybrid, 1617 Summation summation = Summation.appropriate) 1618 if (isFloatingPoint!F) 1619 { 1620 import mir.math.common: fmamath; 1621 import mir.primitives: isInputRange; 1622 import mir.math.sum: ResolveSummationType; 1623 /++ 1624 Params: 1625 x = range, must be finite iterable 1626 y = range, must be finite iterable 1627 isPopulation = true if population covariance, false if sample covariance (default) 1628 +/ 1629 @fmamath F covariance(RangeX, RangeY)(RangeX x, RangeY y, bool isPopulation = false) 1630 if (isInputRange!RangeX && isInputRange!RangeY) 1631 { 1632 import core.lifetime: move; 1633 1634 auto covarianceAccumulator = CovarianceAccumulator!(F, covarianceAlgo, ResolveSummationType!(summation, RangeX, F))(x.move, y.move); 1635 return covarianceAccumulator.covariance(isPopulation); 1636 } 1637 } 1638 1639 /// ditto 1640 template covariance( 1641 CovarianceAlgo covarianceAlgo = CovarianceAlgo.hybrid, 1642 Summation summation = Summation.appropriate) 1643 { 1644 import mir.math.common: fmamath; 1645 import mir.primitives: isInputRange; 1646 import mir.stat.descriptive.univariate: meanType; 1647 import std.traits: CommonType; 1648 /++ 1649 Params: 1650 x = range, must be finite iterable 1651 y = range, must be finite iterable 1652 isPopulation = true if population covariance, false if sample covariance (default) 1653 +/ 1654 @fmamath CommonType!(meanType!RangeX, meanType!RangeY) covariance(RangeX, RangeY)(RangeX x, RangeY y, bool isPopulation = false) 1655 if (isInputRange!RangeX && isInputRange!RangeY) 1656 { 1657 import core.lifetime: move; 1658 1659 alias F = typeof(return); 1660 return .covariance!(F, covarianceAlgo, summation)(x.move, y.move, isPopulation); 1661 } 1662 } 1663 1664 /// ditto 1665 template covariance(F, string covarianceAlgo, string summation = "appropriate") 1666 { 1667 mixin("alias covariance = .covariance!(F, CovarianceAlgo." ~ covarianceAlgo ~ ", Summation." ~ summation ~ ");"); 1668 } 1669 1670 /// ditto 1671 template covariance(string covarianceAlgo, string summation = "appropriate") 1672 { 1673 mixin("alias covariance = .covariance!(CovarianceAlgo." ~ covarianceAlgo ~ ", Summation." ~ summation ~ ");"); 1674 } 1675 1676 /// Covariance of vectors 1677 version(mir_stat_test) 1678 @safe pure nothrow 1679 unittest 1680 { 1681 import mir.ndslice.slice: sliced; 1682 import mir.test: shouldApprox; 1683 1684 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1685 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1686 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1687 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1688 1689 x.covariance(y, true).shouldApprox == -5.5 / 12; 1690 x.covariance(y).shouldApprox == -5.5 / 11; 1691 } 1692 1693 /// Can also set algorithm type 1694 version(mir_stat_test) 1695 @safe pure nothrow 1696 unittest 1697 { 1698 import mir.math.common: approxEqual; 1699 import mir.ndslice.slice: sliced; 1700 import mir.test: shouldApprox; 1701 1702 auto a = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1703 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 1704 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1705 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 1706 1707 auto x = a + 10.0 ^^ 9; 1708 auto y = b + 10.0 ^^ 9; 1709 1710 x.covariance(y).shouldApprox == -5.5 / 11; 1711 1712 // The naive algorithm is numerically unstable in this case 1713 assert(!x.covariance!"naive"(y).approxEqual(-5.5 / 11)); 1714 1715 // The two-pass algorithm provides the same answer as hybrid 1716 x.covariance!"twoPass"(y).shouldApprox == -5.5 / 11; 1717 1718 // And the assumeZeroMean algorithm is way off 1719 assert(!x.covariance!"assumeZeroMean"(y).approxEqual(-5.5 / 11)); 1720 } 1721 1722 /// Can also set algorithm or output type 1723 version(mir_stat_test) 1724 @safe pure nothrow 1725 unittest 1726 { 1727 import mir.ndslice.slice: sliced; 1728 import mir.ndslice.topology: repeat; 1729 import mir.test: shouldApprox; 1730 1731 //Set population covariance, covariance algorithm, sum algorithm or output type 1732 1733 auto a = [1.0, 1e100, 1, -1e100].sliced; 1734 auto b = [1.0e100, 1, 1, -1e100].sliced; 1735 auto x = a * 10_000; 1736 auto y = b * 10_000; 1737 1738 /++ 1739 Due to Floating Point precision, when centering `x`, subtracting the mean 1740 from the second and fourth numbers has no effect (for `y` the same is true 1741 for the first and fourth). Further, after centering and multiplying `x` and 1742 `y`, the third numbers in the slice has precision too low to be included in 1743 the centered sum of the products. 1744 +/ 1745 x.covariance(y).shouldApprox == 1.0e208 / 3; 1746 x.covariance(y, true).shouldApprox == 1.0e208 / 4; 1747 1748 x.covariance!("online")(y).shouldApprox == 1.0e208 / 3; 1749 x.covariance!("online", "kbn")(y).shouldApprox == 1.0e208 / 3; 1750 x.covariance!("online", "kb2")(y).shouldApprox == 1.0e208 / 3; 1751 x.covariance!("online", "precise")(y).shouldApprox == 1.0e208 / 3; 1752 x.covariance!(double, "online", "precise")(y).shouldApprox == 1.0e208 / 3; 1753 1754 auto z = uint.max.repeat(3); 1755 z.covariance!float(z).shouldApprox == 0.0; 1756 static assert(is(typeof(z.covariance!float(z)) == float)); 1757 } 1758 1759 /++ 1760 For integral slices, pass output type as template parameter to ensure output 1761 type is correct. 1762 +/ 1763 version(mir_stat_test) 1764 @safe pure nothrow 1765 unittest 1766 { 1767 import mir.ndslice.slice: sliced; 1768 import mir.test: shouldApprox; 1769 1770 auto x = [0, 1, 1, 2, 4, 4, 1771 2, 7, 5, 1, 2, 0].sliced; 1772 auto y = [6, 3, 7, 1, 1, 1, 1773 9, 5, 3, 1, 3, 7].sliced; 1774 1775 x.covariance(y).shouldApprox == -18.583333 / 11; 1776 static assert(is(typeof(x.covariance(y)) == double)); 1777 1778 x.covariance!float(y).shouldApprox == -18.583333 / 11; 1779 static assert(is(typeof(x.covariance!float(y)) == float)); 1780 } 1781 1782 // make sure works with dynamic array 1783 version(mir_stat_test) 1784 @safe pure nothrow 1785 unittest 1786 { 1787 import mir.test: shouldApprox; 1788 1789 double[] x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1790 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1791 double[] y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1792 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1793 x.covariance(y).shouldApprox == -5.5 / 11; 1794 } 1795 1796 /// Works with @nogc 1797 version(mir_stat_test) 1798 @safe pure nothrow @nogc 1799 unittest 1800 { 1801 import mir.ndslice.allocation: mininitRcslice; 1802 import mir.test: shouldApprox; 1803 1804 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 1805 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 1806 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 1807 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 1808 auto x = mininitRcslice!double(12); 1809 auto y = mininitRcslice!double(12); 1810 x[] = a; 1811 y[] = b; 1812 1813 x.covariance(y, true).shouldApprox == -5.5 / 12; 1814 x.covariance(y).shouldApprox == -5.5 / 11; 1815 } 1816 1817 // compile with dub test --build=unittest-perf --config=unittest-perf --compiler=ldc2 1818 version(mir_stat_test_cov_performance) 1819 unittest 1820 { 1821 import mir.math.sum: Summation; 1822 import mir.math.internal.benchmark; 1823 import std.stdio: writeln; 1824 import std.traits: EnumMembers; 1825 1826 template staticMap(alias fun, alias S, args...) 1827 { 1828 import std.meta: AliasSeq; 1829 alias staticMap = AliasSeq!(); 1830 static foreach (arg; args) 1831 staticMap = AliasSeq!(staticMap, fun!(double, arg, S)); 1832 } 1833 1834 size_t n = 10_000; 1835 size_t m = 1_000; 1836 1837 alias S = Summation.fast; 1838 alias E = EnumMembers!CovarianceAlgo; 1839 alias fs = staticMap!(covariance, S, E); 1840 double[fs.length] output; 1841 1842 auto e = [E]; 1843 auto time = benchmarkRandom2!(fs)(n, m, output); 1844 writeln("Covariance performance test"); 1845 foreach (size_t i; 0 .. fs.length) { 1846 writeln("Function ", i + 1, ", Algo: ", e[i], ", Output: ", output[i], ", Elapsed time: ", time[i]); 1847 } 1848 writeln(); 1849 } 1850 1851 /++ 1852 Correlation algorithms. 1853 1854 See Also: 1855 $(LREF CovarianceAlgo) 1856 $(WEB en.wikipedia.org/wiki/Algorithms_for_calculating_variance, Algorithms for calculating variance). 1857 +/ 1858 enum CorrelationAlgo 1859 { 1860 /++ 1861 Performs Welford's online algorithm for updating correlation. While it only 1862 iterates each input once, it can be slower for smaller inputs. However, it 1863 is also more accurate. Can also `put` another CorrelationAccumulator of the 1864 same type, which uses the parallel algorithm from Chan et al. 1865 +/ 1866 online, 1867 1868 /++ 1869 Calculates correlation using (E(x*y) - E(x)*E(y))/(sqrt(E(x^2)-E(x)^2)*sqrt(E(y^2)-E(y)^2)) (alowing for adjustments for 1870 population/sample variance). This algorithm can be numerically unstable. 1871 +/ 1872 naive, 1873 1874 /++ 1875 Calculates correlation using a two-pass algorithm whereby the inputs are first 1876 centered and then the sum of products is calculated from that. May be faster 1877 than `online` and generally more accurate than the `naive` algorithm. 1878 +/ 1879 twoPass, 1880 1881 /++ 1882 Calculates correlation assuming the mean of the inputs is zero. 1883 +/ 1884 assumeZeroMean, 1885 1886 /++ 1887 Calculates correlation assuming the mean of the inputs is zero and standard 1888 deviation is one. 1889 +/ 1890 assumeStandardized, 1891 1892 /++ 1893 When slices, slice-like objects, or ranges are the inputs, uses the two-pass 1894 algorithm. When an individual data-point is added, uses the online algorithm. 1895 +/ 1896 hybrid 1897 } 1898 1899 /// 1900 struct CorrelationAccumulator(T, CorrelationAlgo correlationAlgo, Summation summation) 1901 if (isMutable!T && correlationAlgo == CorrelationAlgo.naive) 1902 { 1903 import mir.math.sum: elementType, Summator; 1904 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 1905 import mir.primitives: isInputRange, front, empty, popFront; 1906 1907 /// 1908 private size_t _count; 1909 /// 1910 alias S = Summator!(T, summation); 1911 /// 1912 S summatorLeft; 1913 /// 1914 S summatorRight; 1915 /// 1916 S summatorOfProducts; 1917 /// 1918 S summatorOfSquaresLeft; 1919 /// 1920 S summatorOfSquaresRight; 1921 1922 /// 1923 this(RangeX, RangeY)(RangeX x, RangeY y) 1924 if (isInputRange!RangeX && 1925 isInputRange!RangeY) 1926 { 1927 import core.lifetime: move; 1928 this.put(x.move, y.move); 1929 } 1930 1931 /// 1932 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 1933 Slice!(IteratorX, 1, kindX) x, 1934 Slice!(IteratorY, 1, kindY) y 1935 ) 1936 in 1937 { 1938 assert(x.length == y.length, 1939 "CorrelationAcumulator.put: both vectors must have the same length"); 1940 } 1941 do 1942 { 1943 import mir.ndslice.topology: zip, map; 1944 1945 _count += x.length; 1946 summatorLeft.put(x); 1947 summatorRight.put(y); 1948 summatorOfProducts.put(x.zip(y).map!"a * b"); 1949 summatorOfSquaresLeft.put(x * x); 1950 summatorOfSquaresRight.put(y * y); 1951 } 1952 1953 /// 1954 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 1955 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 1956 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 1957 { 1958 import mir.ndslice.slice: toSlice; 1959 this.put(x.toSlice, y.toSlice); 1960 } 1961 1962 /// 1963 void put(RangeX, RangeY)(RangeX x, RangeY y) 1964 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 1965 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 1966 { 1967 do 1968 { 1969 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 1970 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 1971 this.put(x.front, y.front); 1972 x.popFront; 1973 y.popFront; 1974 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 1975 } 1976 1977 /// 1978 void put()(T x, T y) 1979 { 1980 _count++; 1981 summatorLeft.put(x); 1982 summatorRight.put(y); 1983 summatorOfProducts.put(x * y); 1984 summatorOfSquaresLeft.put(x * x); 1985 summatorOfSquaresRight.put(y * y); 1986 } 1987 1988 /// 1989 void put(U, Summation sumAlgo)(CorrelationAccumulator!(U, correlationAlgo, sumAlgo) v) 1990 { 1991 _count += v.count; 1992 summatorLeft.put(v.sumLeft!U); 1993 summatorRight.put(v.sumRight!U); 1994 summatorOfProducts.put(v.sumOfProducts!U); 1995 summatorOfSquaresLeft.put(v.sumOfSquaresLeft!U); 1996 summatorOfSquaresRight.put(v.sumOfSquaresRight!U); 1997 } 1998 1999 const: 2000 2001 /// 2002 size_t count() @property 2003 { 2004 return _count; 2005 } 2006 /// 2007 F sumLeft(F = T)() @property 2008 { 2009 return cast(F) summatorLeft.sum; 2010 } 2011 /// 2012 F sumRight(F = T)() @property 2013 { 2014 return cast(F) summatorRight.sum; 2015 } 2016 /// 2017 F meanLeft(F = T)() @property 2018 { 2019 return sumLeft!F / count; 2020 } 2021 /// 2022 F meanRight(F = T)() @property 2023 { 2024 return sumRight!F / count; 2025 } 2026 /// 2027 F sumOfProducts(F = T)() @property 2028 { 2029 return cast(F) summatorOfProducts.sum; 2030 } 2031 /// 2032 F sumOfSquaresLeft(F = T)() @property 2033 { 2034 return cast(F) summatorOfSquaresLeft.sum; 2035 } 2036 /// 2037 F sumOfSquaresRight(F = T)() @property 2038 { 2039 return cast(F) summatorOfSquaresRight.sum; 2040 } 2041 /// 2042 F centeredSumOfProducts(F = T)() @property 2043 { 2044 return sumOfProducts!F - sumLeft!F * sumRight!F / count; 2045 } 2046 /// 2047 F centeredSumOfSquaresLeft(F = T)() @property 2048 { 2049 return sumOfSquaresLeft!F - count * meanLeft!F * meanLeft!F; 2050 } 2051 /// 2052 F centeredSumOfSquaresRight(F = T)() @property 2053 { 2054 return sumOfSquaresRight!F - count * meanRight!F * meanRight!F; 2055 } 2056 /// 2057 F covariance(F = T)(bool isPopulation) @property 2058 { 2059 return sumOfProducts!F / (count + isPopulation - 1) - 2060 (sumLeft!F * sumRight!F) * (F(1) / (count * (count + isPopulation - 1))); 2061 } 2062 /// 2063 F correlation(F = T)() @property 2064 in (centeredSumOfSquaresLeft > 0, "`x` must have centered sum of squares larger than zero") 2065 in (centeredSumOfSquaresRight > 0, "`y` must have centered sum of squares larger than zero") 2066 { 2067 import mir.math.common: sqrt; 2068 return (count * sumOfProducts!F - sumLeft!F * sumRight!F) / 2069 (sqrt(count * sumOfSquaresLeft!F - sumLeft!F * sumLeft!F) * 2070 sqrt(count * sumOfSquaresRight!F - sumRight!F * sumRight!F)); 2071 } 2072 } 2073 2074 /// 2075 version(mir_stat_test) 2076 @safe pure nothrow 2077 unittest 2078 { 2079 import mir.math.sum: Summation; 2080 import mir.ndslice.slice: sliced; 2081 import mir.test: shouldApprox; 2082 2083 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2084 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2085 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2086 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2087 2088 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v; 2089 v.put(x, y); 2090 2091 v.covariance(true).shouldApprox == 82.25 / 12 - (29.25 * 36) / (12 * 12); 2092 v.covariance(false).shouldApprox == 82.25 / 11 - (29.25 * 36) / (12 * 12) * (12.0 / 11); 2093 2094 v.correlation.shouldApprox == -0.0623684; 2095 2096 v.put(4.0, 3.0); 2097 v.covariance(true).shouldApprox == 94.25 / 13 - (33.25 * 39) / (13 * 13); 2098 v.covariance(false).shouldApprox == 94.25 / 12 - (33.25 * 39) / (13 * 13) * (13.0 / 12); 2099 2100 v.correlation.shouldApprox == -0.0611234; 2101 } 2102 2103 // Check dynamic array 2104 version(mir_stat_test) 2105 @safe pure nothrow 2106 unittest 2107 { 2108 import mir.math.sum: Summation; 2109 import mir.ndslice.slice: sliced; 2110 import mir.test: shouldApprox; 2111 2112 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2113 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2114 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2115 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2116 2117 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v; 2118 v.put(x, y); 2119 2120 v.covariance(true).shouldApprox == 82.25 / 12 - (29.25 * 36) / (12 * 12); 2121 v.covariance(false).shouldApprox == 82.25 / 11 - (29.25 * 36) / (12 * 12) * (12.0 / 11); 2122 2123 v.meanLeft.shouldApprox == 2.4375; 2124 v.meanRight.shouldApprox == 3; 2125 2126 v.correlation.shouldApprox == -0.0623684; 2127 2128 v.put(4.0, 3.0); 2129 2130 v.covariance(true).shouldApprox == 94.25 / 13 - (33.25 * 39) / (13 * 13); 2131 v.covariance(false).shouldApprox == 94.25 / 12 - (33.25 * 39) / (13 * 13) * (13.0 / 12); 2132 2133 v.correlation.shouldApprox == -0.0611234; 2134 } 2135 2136 // rcslice test 2137 version(mir_stat_test) 2138 @safe pure nothrow @nogc 2139 unittest 2140 { 2141 import mir.math.sum: Summation; 2142 import mir.ndslice.allocation: mininitRcslice; 2143 import mir.test: shouldApprox; 2144 2145 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2146 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2147 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2148 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2149 auto x = mininitRcslice!double(12); 2150 auto y = mininitRcslice!double(12); 2151 x[] = a; 2152 y[] = b; 2153 auto v = CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive)(x, y); 2154 2155 v.covariance(true).shouldApprox == -5.5 / 12; 2156 v.covariance(false).shouldApprox == -5.5 / 11; 2157 v.correlation.shouldApprox == -0.0623684; 2158 } 2159 2160 // Check adding CorrelationAccumultors 2161 version(mir_stat_test) 2162 @safe pure nothrow 2163 unittest 2164 { 2165 import mir.math.sum: sum, Summation; 2166 import mir.ndslice.slice: sliced; 2167 import mir.test: shouldApprox; 2168 2169 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 2170 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 2171 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2172 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2173 2174 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v1; 2175 v1.put(x1, y1); 2176 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v2; 2177 v2.put(x2, y2); 2178 v1.put(v2); 2179 2180 v1.covariance(true).shouldApprox == -5.5 / 12; 2181 v1.covariance(false).shouldApprox == -5.5 / 11; 2182 v1.correlation.shouldApprox == -0.0623684; 2183 } 2184 2185 // Test input range 2186 version(mir_stat_test) 2187 @safe pure nothrow 2188 unittest 2189 { 2190 import mir.math.sum: Summation; 2191 import mir.test: should, shouldApprox; 2192 import std.range: iota; 2193 2194 auto x = iota(0, 5); 2195 auto y = iota(-3, 2); 2196 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v; 2197 v.put(x, y); 2198 v.covariance(true).should == 10.0 / 5; 2199 v.correlation.shouldApprox == 1; 2200 } 2201 2202 /// 2203 struct CorrelationAccumulator(T, CorrelationAlgo correlationAlgo, Summation summation) 2204 if (isFloatingPoint!T && isMutable!T && correlationAlgo == CorrelationAlgo.online) 2205 { 2206 import mir.math.sum: elementType, Summator; 2207 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 2208 import mir.primitives: isInputRange, front, empty, popFront; 2209 2210 /// 2211 private size_t _count; 2212 /// 2213 alias S = Summator!(T, summation); 2214 /// 2215 S summatorLeft; 2216 /// 2217 S summatorRight; 2218 /// 2219 S centeredSummatorOfProducts; 2220 /// 2221 S centeredSummatorOfSquaresLeft; 2222 /// 2223 S centeredSummatorOfSquaresRight; 2224 2225 /// 2226 this(RangeX, RangeY)(RangeX x, RangeY y) 2227 if (isInputRange!RangeX && isInputRange!RangeY) 2228 { 2229 import core.lifetime: move; 2230 this.put(x.move, y.move); 2231 } 2232 2233 /// 2234 this()(T x, T y) 2235 { 2236 this.put(x, y); 2237 } 2238 2239 /// 2240 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 2241 Slice!(IteratorX, 1, kindX) x, 2242 Slice!(IteratorY, 1, kindY) y 2243 ) 2244 in 2245 { 2246 assert(x.length == y.length, 2247 "CorrelationAcumulator.put: both vectors must have the same length"); 2248 } 2249 do 2250 { 2251 import mir.ndslice.topology: zip; 2252 2253 foreach(e; x.zip(y)) { 2254 this.put(e[0], e[1]); 2255 } 2256 } 2257 2258 /// 2259 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 2260 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 2261 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 2262 { 2263 import mir.ndslice.slice: toSlice; 2264 this.put(x.toSlice, y.toSlice); 2265 } 2266 2267 /// 2268 void put(RangeX, RangeY)(RangeX x, RangeY y) 2269 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && 2270 isInputRange!RangeY && !isConvertibleToSlice!RangeY) 2271 { 2272 import std.range: zip; 2273 foreach(a, b; zip(x, y)) { 2274 this.put(a, b); 2275 } 2276 } 2277 2278 /// 2279 void put()(T x, T y) 2280 { 2281 T deltaX = x; 2282 T deltaY = y; 2283 if (count > 0) { 2284 deltaX -= meanLeft; 2285 deltaY -= meanRight; 2286 } 2287 _count++; 2288 summatorLeft.put(x); 2289 summatorRight.put(y); 2290 centeredSummatorOfProducts.put(deltaX * (y - meanRight)); 2291 centeredSummatorOfSquaresLeft.put(deltaX * (x - meanLeft)); 2292 centeredSummatorOfSquaresRight.put(deltaY * (y - meanRight)); 2293 } 2294 2295 /// 2296 void put(U, CorrelationAlgo covAlgo, Summation sumAlgo)(CorrelationAccumulator!(U, covAlgo, sumAlgo) v) 2297 if (!is(covAlgo == CorrelationAlgo.assumeZeroMean)) 2298 { 2299 size_t oldCount = count; 2300 T deltaLeft = v.meanLeft; 2301 T deltaRight = v.meanRight; 2302 if (count > 0) { 2303 deltaLeft -= meanLeft!T; 2304 deltaRight -= meanRight!T; 2305 } 2306 _count += v.count; 2307 summatorLeft.put(v.sumLeft!T); 2308 summatorRight.put(v.sumRight!T); 2309 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T + deltaLeft * deltaRight * v.count * oldCount / count); 2310 centeredSummatorOfSquaresLeft.put(v.centeredSumOfSquaresLeft!T + deltaLeft * deltaLeft * v.count * oldCount / count); 2311 centeredSummatorOfSquaresRight.put(v.centeredSumOfSquaresRight!T + deltaRight * deltaRight * v.count * oldCount / count); 2312 } 2313 2314 const: 2315 2316 /// 2317 size_t count() @property 2318 { 2319 return _count; 2320 } 2321 /// 2322 F sumLeft(F = T)() @property 2323 { 2324 return cast(F) summatorLeft.sum; 2325 } 2326 /// 2327 F sumRight(F = T)() @property 2328 { 2329 return cast(F) summatorRight.sum; 2330 } 2331 /// 2332 F meanLeft(F = T)() @property 2333 { 2334 return sumLeft!F / count; 2335 } 2336 /// 2337 F meanRight(F = T)() @property 2338 { 2339 return sumRight!T / count; 2340 } 2341 /// 2342 F centeredSumOfProducts(F = T)() @property 2343 { 2344 return cast(F) centeredSummatorOfProducts.sum; 2345 } 2346 /// 2347 F centeredSumOfSquaresLeft(F = T)() @property 2348 { 2349 return cast(F) centeredSummatorOfSquaresLeft.sum; 2350 } 2351 /// 2352 F centeredSumOfSquaresRight(F = T)() @property 2353 { 2354 return cast(F) centeredSummatorOfSquaresRight.sum; 2355 } 2356 /// 2357 F covariance(F = T)(bool isPopulation) @property 2358 { 2359 return centeredSumOfProducts!F / (count + isPopulation - 1); 2360 } 2361 /// 2362 F correlation(F = T)() @property 2363 in (centeredSumOfSquaresLeft > 0, "`x` must have centered sum of squares larger than zero") 2364 in (centeredSumOfSquaresRight > 0, "`y` must have centered sum of squares larger than zero") 2365 { 2366 import mir.math.common: sqrt; 2367 return centeredSumOfProducts!F / (sqrt(centeredSumOfSquaresLeft!F) * sqrt(centeredSumOfSquaresRight!F)); 2368 } 2369 } 2370 2371 /// 2372 version(mir_stat_test) 2373 @safe pure nothrow 2374 unittest 2375 { 2376 import mir.math.sum: Summation; 2377 import mir.ndslice.slice: sliced; 2378 import mir.test: shouldApprox; 2379 2380 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2381 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2382 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2383 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2384 2385 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v; 2386 v.put(x, y); 2387 2388 v.covariance(true).shouldApprox == -5.5 / 12; 2389 v.covariance(false).shouldApprox == -5.5 / 11; 2390 2391 v.correlation.shouldApprox == -0.0623684; 2392 2393 v.put(4.0, 3.0); 2394 v.covariance(true).shouldApprox == -5.5 / 13; 2395 v.covariance(false).shouldApprox == -5.5 / 12; 2396 2397 v.correlation.shouldApprox == -0.0611234; 2398 } 2399 2400 // Check dynamic array 2401 version(mir_stat_test) 2402 @safe pure nothrow 2403 unittest 2404 { 2405 import mir.math.sum: Summation; 2406 import mir.ndslice.slice: sliced; 2407 import mir.test: shouldApprox; 2408 2409 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2410 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2411 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2412 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2413 2414 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v; 2415 v.put(x, y); 2416 2417 v.covariance(true).shouldApprox == -5.5 / 12; 2418 v.covariance(false).shouldApprox == -5.5 / 11; 2419 2420 v.meanLeft.shouldApprox == 2.4375; 2421 v.meanRight.shouldApprox == 3; 2422 2423 v.correlation.shouldApprox == -0.0623684; 2424 2425 v.put(4.0, 3.0); 2426 v.covariance(true).shouldApprox == -5.5 / 13; 2427 v.covariance(false).shouldApprox == -5.5 / 12; 2428 2429 v.correlation.shouldApprox == -0.0611234; 2430 } 2431 2432 // rcslice test 2433 version(mir_stat_test) 2434 @safe pure nothrow @nogc 2435 unittest 2436 { 2437 import mir.math.sum: Summation; 2438 import mir.ndslice.allocation: mininitRcslice; 2439 import mir.test: shouldApprox; 2440 2441 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2442 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2443 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2444 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2445 auto x = mininitRcslice!double(12); 2446 auto y = mininitRcslice!double(12); 2447 x[] = a; 2448 y[] = b; 2449 auto v = CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive)(x, y); 2450 2451 v.covariance(true).shouldApprox == -5.5 / 12; 2452 v.covariance(false).shouldApprox == -5.5 / 11; 2453 2454 v.correlation.shouldApprox == -0.0623684; 2455 } 2456 2457 // Check adding CorrelationAccumultors 2458 version(mir_stat_test) 2459 @safe pure nothrow 2460 unittest 2461 { 2462 import mir.math.sum: sum, Summation; 2463 import mir.ndslice.slice: sliced; 2464 import mir.test: shouldApprox; 2465 2466 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 2467 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 2468 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2469 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2470 2471 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v1; 2472 v1.put(x1, y1); 2473 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v2; 2474 v2.put(x2, y2); 2475 v1.put(v2); 2476 2477 v1.covariance(true).shouldApprox == -5.5 / 12; 2478 v1.covariance(false).shouldApprox == -5.5 / 11; 2479 2480 v1.correlation.shouldApprox == -0.0623684; 2481 } 2482 2483 // Check adding CorrelationAccumultors (naive) 2484 version(mir_stat_test) 2485 @safe pure nothrow 2486 unittest 2487 { 2488 import mir.math.sum: sum, Summation; 2489 import mir.ndslice.slice: sliced; 2490 import mir.test: shouldApprox; 2491 2492 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 2493 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 2494 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2495 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2496 2497 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v1; 2498 v1.put(x1, y1); 2499 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v2; 2500 v2.put(x2, y2); 2501 v1.put(v2); 2502 2503 v1.covariance(true).shouldApprox == -5.5 / 12; 2504 v1.covariance(false).shouldApprox == -5.5 / 11; 2505 2506 v1.correlation.shouldApprox == -0.0623684; 2507 } 2508 2509 // Check adding CorrelationAccumultors (twoPass) 2510 version(mir_stat_test) 2511 @safe pure nothrow 2512 unittest 2513 { 2514 import mir.math.sum: sum, Summation; 2515 import mir.ndslice.slice: sliced; 2516 import mir.test: shouldApprox; 2517 2518 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 2519 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 2520 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2521 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2522 2523 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v1; 2524 v1.put(x1, y1); 2525 auto v2 = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x2, y2); 2526 v1.put(v2); 2527 2528 v1.covariance(true).shouldApprox == -5.5 / 12; 2529 v1.covariance(false).shouldApprox == -5.5 / 11; 2530 2531 v1.correlation.shouldApprox == -0.0623684; 2532 } 2533 2534 // Check adding CorrelationAccumultors (assumeZeroMean) 2535 version(mir_stat_test) 2536 @safe pure nothrow 2537 unittest 2538 { 2539 import mir.math.sum: sum, Summation; 2540 import mir.ndslice.slice: sliced; 2541 import mir.stat.transform: center; 2542 import mir.test: shouldApprox; 2543 2544 auto a1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 2545 auto b1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 2546 auto a2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2547 auto b2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2548 auto x1 = a1.center; 2549 auto y1 = b1.center; 2550 auto x2 = a2.center; 2551 auto y2 = b2.center; 2552 2553 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v1; 2554 v1.put(x1, y1); 2555 auto v2 = CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive)(x2, y2); 2556 v1.put(v2); 2557 2558 v1.covariance(true).shouldApprox == -1.9375 / 12; //note: different from above due to inconsistent centering 2559 v1.covariance(false).shouldApprox == -1.9375 / 11; //note: different from above due to inconsistent centering 2560 2561 v1.correlation.shouldApprox == -0.0229089; //note: different from above due to inconsistent centering 2562 } 2563 2564 // Initializing with one point 2565 version(mir_stat_test) 2566 @safe pure nothrow 2567 unittest 2568 { 2569 import mir.math.sum: Summation; 2570 import mir.test: should; 2571 2572 auto v = CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive)(4.0, 3.0); 2573 v.covariance(true).should == 0; 2574 } 2575 2576 // Test input range 2577 version(mir_stat_test) 2578 @safe pure nothrow 2579 unittest 2580 { 2581 import mir.math.sum: Summation; 2582 import mir.test: should, shouldApprox; 2583 import std.range: chunks, iota; 2584 2585 auto x = iota(0, 5); 2586 auto y = iota(-3, 2); 2587 CorrelationAccumulator!(double, CorrelationAlgo.online, Summation.naive) v; 2588 v.put(x, y); 2589 v.covariance(true).should == 10 / 5; 2590 v.correlation.shouldApprox == 1; 2591 2592 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v2; 2593 v2.put(x.chunks(1), y.chunks(1)); 2594 v2.covariance(true).should == 10 / 5; 2595 v2.correlation.shouldApprox == 1; 2596 } 2597 2598 /// 2599 struct CorrelationAccumulator(T, CorrelationAlgo correlationAlgo, Summation summation) 2600 if (isMutable!T && correlationAlgo == CorrelationAlgo.twoPass) 2601 { 2602 import mir.functional: naryFun; 2603 import mir.math.sum: elementType, Summator; 2604 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 2605 import mir.primitives: isInputRange, front, empty, popFront; 2606 import mir.stat.descriptive.univariate: MeanAccumulator; 2607 2608 /// 2609 private size_t _count; 2610 /// 2611 alias S = Summator!(T, summation); 2612 /// 2613 private S summatorLeft; 2614 /// 2615 private S summatorRight; 2616 /// 2617 private S centeredSummatorOfProducts; 2618 /// 2619 private S centeredSummatorOfSquaresLeft; 2620 /// 2621 private S centeredSummatorOfSquaresRight; 2622 2623 /// 2624 this(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 2625 Slice!(IteratorX, 1, kindX) x, Slice!(IteratorY, 1, kindY) y) 2626 in 2627 { 2628 assert(x.length == y.length, 2629 "CorrelationAcumulator.put: both vectors must have the same length"); 2630 } 2631 do 2632 { 2633 import mir.ndslice.internal: LeftOp; 2634 import mir.ndslice.topology: map, vmap, zip; 2635 2636 _count = x.length; 2637 summatorLeft.put(x.lightScope); 2638 summatorRight.put(y.lightScope); 2639 auto z = x.vmap(LeftOp!("-", T)(meanLeft)).zip(y.vmap(LeftOp!("-", T)(meanRight))).map!("a * b", "a * a", "b * b"); 2640 z.putter3(centeredSummatorOfProducts, 2641 centeredSummatorOfSquaresLeft, 2642 centeredSummatorOfSquaresRight); 2643 } 2644 2645 /// 2646 this(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 2647 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 2648 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 2649 { 2650 import mir.ndslice.slice: toSlice; 2651 this(x.toSlice, y.toSlice); 2652 } 2653 2654 /// 2655 this(RangeX, RangeY)(RangeX x, RangeY y) 2656 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 2657 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 2658 { 2659 import mir.primitives: elementCount, hasShape; 2660 2661 static if (hasShape!RangeX && hasShape!RangeY) { 2662 assert(x.elementCount == y.elementCount); 2663 _count += x.elementCount; 2664 summatorLeft.put(x); 2665 summatorRight.put(y); 2666 } else { 2667 import std.range: zip; 2668 2669 foreach(a, b; zip(x, y)) { 2670 _count++; 2671 summatorLeft.put(a); 2672 summatorRight.put(b); 2673 } 2674 } 2675 2676 T xMean = meanLeft; 2677 T yMean = meanRight; 2678 T xDeMean; 2679 T yDeMean; 2680 do 2681 { 2682 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 2683 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 2684 xDeMean = x.front - xMean; 2685 yDeMean = y.front - yMean; 2686 centeredSummatorOfProducts.put(xDeMean * yDeMean); 2687 centeredSummatorOfSquaresLeft.put(xDeMean * xDeMean); 2688 centeredSummatorOfSquaresRight.put(yDeMean * yDeMean); 2689 x.popFront; 2690 y.popFront; 2691 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 2692 } 2693 2694 /// 2695 this()(T x, T y) 2696 { 2697 _count++; 2698 summatorLeft.put(x); 2699 summatorRight.put(y); 2700 centeredSummatorOfProducts.put(0); 2701 centeredSummatorOfSquaresLeft.put(0); 2702 centeredSummatorOfSquaresRight.put(0); 2703 } 2704 2705 const: 2706 2707 /// 2708 size_t count() @property 2709 { 2710 return _count; 2711 } 2712 /// 2713 F sumLeft(F = T)() @property 2714 { 2715 return cast(F) summatorLeft.sum; 2716 } 2717 /// 2718 F sumRight(F = T)() @property 2719 { 2720 return cast(F) summatorRight.sum; 2721 } 2722 /// 2723 F meanLeft(F = T)() @property 2724 { 2725 return sumLeft!F / count; 2726 } 2727 /// 2728 F meanRight(F = T)() @property 2729 { 2730 return sumRight!F / count; 2731 } 2732 /// 2733 F centeredSumOfProducts(F = T)() @property 2734 { 2735 return cast(F) centeredSummatorOfProducts.sum; 2736 } 2737 /// 2738 F centeredSumOfSquaresLeft(F = T)() @property 2739 { 2740 return cast(F) centeredSummatorOfSquaresLeft.sum; 2741 } 2742 /// 2743 F centeredSumOfSquaresRight(F = T)() @property 2744 { 2745 return cast(F) centeredSummatorOfSquaresRight.sum; 2746 } 2747 /// 2748 F covariance(F = T)(bool isPopulation) @property 2749 { 2750 return centeredSumOfProducts!F / (count + isPopulation - 1); 2751 } 2752 /// 2753 F correlation(F = T)() @property 2754 in (centeredSumOfSquaresLeft > 0, "`x` must have centered sum of squares larger than zero") 2755 in (centeredSumOfSquaresRight > 0, "`y` must have centered sum of squares larger than zero") 2756 { 2757 import mir.math.common: sqrt; 2758 return centeredSumOfProducts!F / (sqrt(centeredSumOfSquaresLeft!F) * sqrt(centeredSumOfSquaresRight!F)); 2759 } 2760 } 2761 2762 /// 2763 version(mir_stat_test) 2764 @safe pure nothrow 2765 unittest 2766 { 2767 import mir.math.sum: Summation; 2768 import mir.ndslice.slice: sliced; 2769 import mir.test: shouldApprox; 2770 2771 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2772 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2773 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2774 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2775 2776 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x, y); 2777 2778 v.covariance(true).shouldApprox == -5.5 / 12; 2779 v.covariance(false).shouldApprox == -5.5 / 11; 2780 2781 v.correlation.shouldApprox == -0.0623684; 2782 } 2783 2784 // Check dynamic array 2785 version(mir_stat_test) 2786 @safe pure nothrow 2787 unittest 2788 { 2789 import mir.math.sum: Summation; 2790 import mir.ndslice.slice: sliced; 2791 import mir.test: shouldApprox; 2792 2793 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2794 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2795 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2796 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2797 2798 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x, y); 2799 2800 v.covariance(true).shouldApprox == -5.5 / 12; 2801 v.covariance(false).shouldApprox == -5.5 / 11; 2802 2803 v.meanLeft.shouldApprox == 2.4375; 2804 v.meanRight.shouldApprox ==3; 2805 2806 v.correlation.shouldApprox == -0.0623684; 2807 } 2808 2809 // rcslice test 2810 version(mir_stat_test) 2811 @safe pure nothrow @nogc 2812 unittest 2813 { 2814 import mir.math.sum: Summation; 2815 import mir.ndslice.allocation: mininitRcslice; 2816 import mir.test: shouldApprox; 2817 2818 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2819 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2820 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2821 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2822 auto x = mininitRcslice!double(12); 2823 auto y = mininitRcslice!double(12); 2824 x[] = a; 2825 y[] = b; 2826 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x, y); 2827 2828 v.covariance(true).shouldApprox == -5.5 / 12; 2829 v.covariance(false).shouldApprox == -5.5 / 11; 2830 2831 v.correlation.shouldApprox == -0.0623684; 2832 } 2833 2834 // Check Vmap 2835 version(mir_stat_test) 2836 @safe pure nothrow 2837 unittest 2838 { 2839 import mir.math.sum: Summation; 2840 import mir.ndslice.slice: sliced; 2841 import mir.test: shouldApprox; 2842 2843 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2844 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 2845 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2846 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 2847 auto x = a + 1; 2848 auto y = b - 1; 2849 2850 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x, y); 2851 2852 v.covariance(true).shouldApprox == -5.5 / 12; 2853 v.covariance(false).shouldApprox == -5.5 / 11; 2854 2855 v.correlation.shouldApprox == -0.0623684; 2856 } 2857 2858 // Initializing with one point 2859 version(mir_stat_test) 2860 @safe pure nothrow 2861 unittest 2862 { 2863 import mir.math.sum: Summation; 2864 import mir.test: should; 2865 2866 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(4.0, 3.0); 2867 v.centeredSumOfProducts.should == 0; 2868 v.count.should == 1; 2869 } 2870 2871 // withAsSlice test 2872 version(mir_stat_test) 2873 @safe pure nothrow @nogc 2874 unittest 2875 { 2876 import mir.rc.array: RCArray; 2877 import mir.test: shouldApprox; 2878 2879 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 2880 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 2881 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 2882 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 2883 2884 auto x = RCArray!double(12); 2885 foreach(i, ref e; x) 2886 e = a[i]; 2887 auto y = RCArray!double(12); 2888 foreach(i, ref e; y) 2889 e = b[i]; 2890 2891 auto v = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x, y); 2892 v.covariance(true).shouldApprox == -5.5 / 12; 2893 v.covariance(false).shouldApprox == -5.5 / 11; 2894 v.correlation.shouldApprox == -0.0623684; 2895 } 2896 2897 // Test input range 2898 version(mir_stat_test) 2899 @safe pure nothrow 2900 unittest 2901 { 2902 import mir.math.sum: Summation; 2903 import mir.test: should, shouldApprox; 2904 import std.range: iota; 2905 import std.algorithm: map; 2906 2907 auto x1 = iota(0, 5); 2908 auto y1 = iota(-3, 2); 2909 auto v1 = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x1, y1); 2910 v1.covariance(true).should == 10.0 / 5; 2911 v1.correlation.shouldApprox == 1; 2912 2913 // this version can't use elementCount 2914 auto x2 = x1.map!(a => 2 * a); 2915 auto y2 = y1.map!(a => 2 * a); 2916 auto v2 = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x2, y2); 2917 v2.covariance(true).should == 40.0 / 5; 2918 v2.correlation.shouldApprox == 1; 2919 } 2920 2921 /// 2922 struct CorrelationAccumulator(T, CorrelationAlgo correlationAlgo, Summation summation) 2923 if (isMutable!T && correlationAlgo == CorrelationAlgo.assumeZeroMean) 2924 { 2925 import mir.math.sum: elementType, Summator; 2926 import mir.ndslice.slice: Slice, SliceKind, hasAsSlice, isConvertibleToSlice, isSlice; 2927 import mir.primitives: isInputRange, front, empty, popFront; 2928 2929 /// 2930 private size_t _count; 2931 /// 2932 alias S = Summator!(T, summation); 2933 /// 2934 S centeredSummatorOfProducts; 2935 /// 2936 S centeredSummatorOfSquaresLeft; 2937 /// 2938 S centeredSummatorOfSquaresRight; 2939 2940 /// 2941 this(RangeX, RangeY)(RangeX x, RangeY y) 2942 if (isInputRange!RangeX && isInputRange!RangeY) 2943 { 2944 this.put(x, y); 2945 } 2946 2947 /// 2948 this()(T x, T y) 2949 { 2950 this.put(x, y); 2951 } 2952 2953 /// 2954 void put(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 2955 Slice!(IteratorX, 1, kindX) x, 2956 Slice!(IteratorY, 1, kindY) y 2957 ) 2958 in 2959 { 2960 assert(x.length == y.length, 2961 "CorrelationAcumulator.put: both vectors must have the same length"); 2962 } 2963 do 2964 { 2965 import mir.ndslice.topology: zip, map; 2966 2967 _count += x.length; 2968 auto z = x.zip(y).map!("a * b", "a * a", "b * b"); 2969 z.putter3(centeredSummatorOfProducts, 2970 centeredSummatorOfSquaresLeft, 2971 centeredSummatorOfSquaresRight); 2972 } 2973 2974 /// 2975 void put(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 2976 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 2977 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 2978 { 2979 import mir.ndslice.slice: toSlice; 2980 this.put(x.toSlice, y.toSlice); 2981 } 2982 2983 /// 2984 void put(RangeX, RangeY)(RangeX x, RangeY y) 2985 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && is(elementType!RangeX : T) && 2986 isInputRange!RangeY && !isConvertibleToSlice!RangeY && is(elementType!RangeY : T)) 2987 { 2988 do 2989 { 2990 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 2991 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 2992 this.put(x.front, y.front); 2993 x.popFront; 2994 y.popFront; 2995 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 2996 } 2997 2998 /// 2999 void put()(T x, T y) 3000 { 3001 _count++; 3002 centeredSummatorOfProducts.put(x * y); 3003 centeredSummatorOfSquaresLeft.put(x * x); 3004 centeredSummatorOfSquaresRight.put(y * y); 3005 } 3006 3007 /// 3008 void put(U, Summation sumAlgo)(CorrelationAccumulator!(U, correlationAlgo, sumAlgo) v) 3009 { 3010 _count += v.count; 3011 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T); 3012 centeredSummatorOfSquaresLeft.put(v.centeredSumOfSquaresLeft!T); 3013 centeredSummatorOfSquaresRight.put(v.centeredSumOfSquaresRight!T); 3014 } 3015 3016 const: 3017 3018 /// 3019 size_t count() @property 3020 { 3021 return _count; 3022 } 3023 /// 3024 F sumLeft(F = T)() @property 3025 { 3026 return 0; 3027 } 3028 /// 3029 F sumRight(F = T)() @property 3030 { 3031 return 0; 3032 } 3033 /// 3034 F meanLeft(F = T)() @property 3035 { 3036 return 0; 3037 } 3038 /// 3039 F meanRight(F = T)() @property 3040 { 3041 return 0; 3042 } 3043 /// 3044 F centeredSumOfProducts(F = T)() @property 3045 { 3046 return cast(F) centeredSummatorOfProducts.sum; 3047 } 3048 /// 3049 F centeredSumOfSquaresLeft(F = T)() @property 3050 { 3051 return cast(F) centeredSummatorOfSquaresLeft.sum; 3052 } 3053 /// 3054 F centeredSumOfSquaresRight(F = T)() @property 3055 { 3056 return cast(F) centeredSummatorOfSquaresRight.sum; 3057 } 3058 /// 3059 F covariance(F = T)(bool isPopulation) @property 3060 { 3061 return centeredSumOfProducts!F / (count + isPopulation - 1); 3062 } 3063 /// 3064 F correlation(F = T)() @property 3065 in (centeredSumOfSquaresLeft > 0, "`x` must have centered sum of squares larger than zero") 3066 in (centeredSumOfSquaresRight > 0, "`y` must have centered sum of squares larger than zero") 3067 { 3068 import mir.math.common: sqrt; 3069 return centeredSumOfProducts!F / (sqrt(centeredSumOfSquaresLeft!F) * sqrt(centeredSumOfSquaresRight!F)); 3070 } 3071 } 3072 3073 /// 3074 version(mir_stat_test) 3075 @safe pure nothrow 3076 unittest 3077 { 3078 import mir.stat.transform: center; 3079 import mir.math.sum: Summation; 3080 import mir.ndslice.slice: sliced; 3081 import mir.test: shouldApprox; 3082 3083 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3084 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3085 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3086 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3087 auto x = a.center; 3088 auto y = b.center; 3089 3090 CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive) v; 3091 v.put(x, y); 3092 3093 v.covariance(true).shouldApprox == -5.5 / 12; 3094 v.covariance(false).shouldApprox == -5.5 / 11; 3095 3096 v.correlation.shouldApprox == -0.0623684; 3097 3098 v.put(4.0, 3.0); 3099 v.covariance(true).shouldApprox == 6.5 / 13; 3100 v.covariance(false).shouldApprox == 6.5 / 12; 3101 3102 v.correlation.shouldApprox == 0.0628802; 3103 } 3104 3105 // Check dynamic array 3106 version(mir_stat_test) 3107 @safe pure nothrow 3108 unittest 3109 { 3110 import mir.math.sum: Summation; 3111 import mir.ndslice.slice: sliced; 3112 import mir.stat.descriptive.univariate: mean; 3113 import mir.stat.transform: center; 3114 import mir.test: should, shouldApprox; 3115 3116 auto a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3117 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3118 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3119 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3120 auto aMean = a.mean; 3121 auto bMean = b.mean; 3122 auto x = a.dup; 3123 auto y = b.dup; 3124 for (size_t i; i < a.length; i++) { 3125 x[i] -= aMean; 3126 y[i] -= bMean; 3127 } 3128 3129 CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive) v; 3130 v.put(x, y); 3131 3132 v.covariance(true).shouldApprox == -5.5 / 12; 3133 v.covariance(false).shouldApprox == -5.5 / 11; 3134 3135 v.correlation.shouldApprox == -0.0623684; 3136 3137 v.put(4.0, 3.0); 3138 v.covariance(true).shouldApprox == 6.5 / 13; 3139 v.covariance(false).shouldApprox == 6.5 / 12; 3140 3141 v.correlation.shouldApprox == 0.0628802; 3142 3143 v.meanLeft.should == 0; 3144 v.meanRight.should == 0; 3145 } 3146 3147 // rcslice test 3148 version(mir_stat_test) 3149 @safe pure nothrow @nogc 3150 unittest 3151 { 3152 import mir.math.sum: Summation; 3153 import mir.ndslice.allocation: mininitRcslice; 3154 import mir.stat.transform: center; 3155 import mir.test: shouldApprox; 3156 3157 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3158 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3159 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3160 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3161 auto x = mininitRcslice!double(12); 3162 auto y = mininitRcslice!double(12); 3163 x[] = a; 3164 y[] = b; 3165 auto v = CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive)(x.center, y.center); 3166 3167 v.covariance(true).shouldApprox == -5.5 / 12; 3168 v.covariance(false).shouldApprox == -5.5 / 11; 3169 v.correlation.shouldApprox == -0.0623684; 3170 } 3171 3172 // Check adding CorrelationAccumultors 3173 version(mir_stat_test) 3174 @safe pure nothrow 3175 unittest 3176 { 3177 import mir.math.sum: sum, Summation; 3178 import mir.ndslice.slice: sliced; 3179 import mir.test: shouldApprox; 3180 3181 auto a1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 3182 auto b1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 3183 auto a2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3184 auto b2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3185 auto meanA = (a1.sum + a2.sum) / 12; 3186 auto meanB = (b1.sum + b2.sum) / 12; 3187 auto x1 = a1 - meanA; 3188 auto y1 = b1 - meanB; 3189 auto x2 = a2 - meanA; 3190 auto y2 = b2 - meanB; 3191 3192 CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive) v1; 3193 v1.put(x1, y1); 3194 CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive) v2; 3195 v2.put(x2, y2); 3196 v1.put(v2); 3197 3198 v1.covariance(true).shouldApprox == -5.5 / 12; 3199 v1.covariance(false).shouldApprox == -5.5 / 11; 3200 v1.correlation.shouldApprox == -0.0623684; 3201 } 3202 3203 // Initializing with one point 3204 version(mir_stat_test) 3205 @safe pure nothrow 3206 unittest 3207 { 3208 import mir.math.sum: Summation; 3209 import mir.test: should; 3210 3211 auto v = CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive)(4.0, 3.0); 3212 v.centeredSumOfProducts.should == 12; 3213 v.count.should == 1; 3214 } 3215 3216 // Test input range 3217 version(mir_stat_test) 3218 @safe pure nothrow 3219 unittest 3220 { 3221 import mir.math.sum: Summation; 3222 import mir.test: should; 3223 import std.range: iota; 3224 3225 auto x = iota(0, 5); 3226 auto y = iota(-3, 2); 3227 auto v = CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive)(x, y); 3228 v.centeredSumOfProducts.should == 0; // different from other algorithms because these don't have mean of zero 3229 } 3230 3231 /// 3232 struct CorrelationAccumulator(T, CorrelationAlgo correlationAlgo, Summation summation) 3233 if (isFloatingPoint!T && isMutable!T && correlationAlgo == CorrelationAlgo.hybrid) 3234 { 3235 import mir.functional: naryFun; 3236 import mir.math.sum: elementType, Summator; 3237 import mir.ndslice.slice: isConvertibleToSlice, isSlice, Slice, SliceKind; 3238 import mir.primitives: isInputRange, front, empty, popFront; 3239 3240 /// 3241 private size_t _count; 3242 /// 3243 alias S = Summator!(T, summation); 3244 /// 3245 S summatorLeft; 3246 /// 3247 S summatorRight; 3248 /// 3249 S centeredSummatorOfProducts; 3250 /// 3251 S centeredSummatorOfSquaresLeft; 3252 /// 3253 S centeredSummatorOfSquaresRight; 3254 3255 /// 3256 this()(T x, T y) 3257 { 3258 this.put(x, y); 3259 } 3260 3261 /// 3262 this(IteratorX, IteratorY, SliceKind kindX, SliceKind kindY)( 3263 Slice!(IteratorX, 1, kindX) x, 3264 Slice!(IteratorY, 1, kindY) y 3265 ) 3266 in 3267 { 3268 assert(x.length == y.length, 3269 "CorrelationAcumulator.put: both vectors must have the same length"); 3270 } 3271 do 3272 { 3273 import mir.ndslice.internal: LeftOp; 3274 import mir.ndslice.topology: map, vmap, zip; 3275 3276 _count += x.length; 3277 summatorLeft.put(x.lightScope); 3278 summatorRight.put(y.lightScope); 3279 auto z = x.vmap(LeftOp!("-", T)(meanLeft)).zip(y.vmap(LeftOp!("-", T)(meanRight))).map!("a * b", "a * a", "b * b"); 3280 z.putter3(centeredSummatorOfProducts, 3281 centeredSummatorOfSquaresLeft, 3282 centeredSummatorOfSquaresRight); 3283 } 3284 3285 /// 3286 this(SliceLikeX, SliceLikeY)(SliceLikeX x, SliceLikeY y) 3287 if (isConvertibleToSlice!SliceLikeX && !isSlice!SliceLikeX && 3288 isConvertibleToSlice!SliceLikeY && !isSlice!SliceLikeY) 3289 { 3290 import mir.ndslice.slice: toSlice; 3291 this(x.toSlice, y.toSlice); 3292 } 3293 3294 /// 3295 this(RangeX, RangeY)(RangeX x, RangeY y) 3296 if (isInputRange!RangeX && !isConvertibleToSlice!RangeX && 3297 isInputRange!RangeY && !isConvertibleToSlice!RangeY) 3298 { 3299 static if (is(elementType!RangeX : T) && is(elementType!RangeY : T)) { 3300 import mir.primitives: elementCount, hasShape; 3301 3302 static if (hasShape!RangeX && hasShape!RangeY) { 3303 assert(x.elementCount == y.elementCount); 3304 _count += x.elementCount; 3305 summatorLeft.put(x); 3306 summatorRight.put(y); 3307 } else { 3308 import std.range: zip; 3309 3310 foreach(a, b; zip(x, y)) { 3311 _count++; 3312 summatorLeft.put(a); 3313 summatorRight.put(b); 3314 } 3315 } 3316 3317 T xMean = meanLeft; 3318 T yMean = meanRight; 3319 T xDeMean; 3320 T yDeMean; 3321 do 3322 { 3323 assert(!(!x.empty && y.empty) && !(x.empty && !y.empty), 3324 "x and y must both be empty at the same time, one cannot be empty while the other has remaining items"); 3325 xDeMean = x.front - xMean; 3326 yDeMean = y.front - yMean; 3327 centeredSummatorOfProducts.put(xDeMean * yDeMean); 3328 centeredSummatorOfSquaresLeft.put(xDeMean * xDeMean); 3329 centeredSummatorOfSquaresRight.put(yDeMean * yDeMean); 3330 x.popFront; 3331 y.popFront; 3332 } while(!x.empty || !y.empty); // Using an || instead of && so that the loop does not end early. mis-matched lengths of x and y sould be caught by above assert 3333 } else { 3334 this.put(x, y); 3335 } 3336 } 3337 3338 /// 3339 void put(RangeX, RangeY)(RangeX x, RangeY y) 3340 if (isInputRange!RangeX && isInputRange!RangeY) 3341 { 3342 static if (is(elementType!RangeX : T) && is(elementType!RangeY : T)) { 3343 auto v = typeof(this)(x, y); 3344 this.put(v); 3345 } else { 3346 import std.range: zip; 3347 foreach(a, b; zip(x, y)) { 3348 this.put(a, b); 3349 } 3350 } 3351 } 3352 3353 /// 3354 void put()(T x, T y) 3355 { 3356 T deltaX = x; 3357 T deltaY = y; 3358 if (count > 0) { 3359 deltaX -= meanLeft; 3360 deltaY -= meanRight; 3361 } 3362 _count++; 3363 summatorLeft.put(x); 3364 summatorRight.put(y); 3365 centeredSummatorOfProducts.put(deltaX * (y - meanRight)); 3366 centeredSummatorOfSquaresLeft.put(deltaX * (x - meanLeft)); 3367 centeredSummatorOfSquaresRight.put(deltaY * (y - meanRight)); 3368 } 3369 3370 /// 3371 void put(U, CorrelationAlgo covAlgo, Summation sumAlgo)(CorrelationAccumulator!(U, covAlgo, sumAlgo) v) 3372 { 3373 size_t oldCount = count; 3374 T deltaLeft = v.meanLeft; 3375 T deltaRight = v.meanRight; 3376 if (count > 0) { 3377 deltaLeft -= meanLeft!T; 3378 deltaRight -= meanRight!T; 3379 } 3380 _count += v.count; 3381 summatorLeft.put(v.sumLeft!T); 3382 summatorRight.put(v.sumRight!T); 3383 centeredSummatorOfProducts.put(v.centeredSumOfProducts!T + deltaLeft * deltaRight * v.count * oldCount / count); 3384 centeredSummatorOfSquaresLeft.put(v.centeredSumOfSquaresLeft!T + deltaLeft * deltaLeft * v.count * oldCount / count); 3385 centeredSummatorOfSquaresRight.put(v.centeredSumOfSquaresRight!T + deltaRight * deltaRight * v.count * oldCount / count); 3386 } 3387 3388 const: 3389 3390 /// 3391 size_t count() @property 3392 { 3393 return _count; 3394 } 3395 /// 3396 F sumLeft(F = T)() @property 3397 { 3398 return cast(F) summatorLeft.sum; 3399 } 3400 /// 3401 F sumRight(F = T)() @property 3402 { 3403 return cast(F) summatorRight.sum; 3404 } 3405 /// 3406 F meanLeft(F = T)() @property 3407 { 3408 return sumLeft!F / count; 3409 } 3410 /// 3411 F meanRight(F = T)() @property 3412 { 3413 return sumRight!F / count; 3414 } 3415 /// 3416 F centeredSumOfProducts(F = T)() @property 3417 { 3418 return cast(F) centeredSummatorOfProducts.sum; 3419 } 3420 /// 3421 F centeredSumOfSquaresLeft(F = T)() @property 3422 { 3423 return cast(F) centeredSummatorOfSquaresLeft.sum; 3424 } 3425 /// 3426 F centeredSumOfSquaresRight(F = T)() @property 3427 { 3428 return cast(F) centeredSummatorOfSquaresRight.sum; 3429 } 3430 /// 3431 F covariance(F = T)(bool isPopulation) @property 3432 { 3433 return centeredSumOfProducts!F / (count + isPopulation - 1); 3434 } 3435 /// 3436 F correlation(F = T)() @property 3437 in (centeredSumOfSquaresLeft > 0, "`x` must have centered sum of squares larger than zero") 3438 in (centeredSumOfSquaresRight > 0, "`y` must have centered sum of squares larger than zero") 3439 { 3440 import mir.math.common: sqrt; 3441 return centeredSumOfProducts!F / (sqrt(centeredSumOfSquaresLeft!F) * sqrt(centeredSumOfSquaresRight!F)); 3442 } 3443 } 3444 3445 /// 3446 version(mir_stat_test) 3447 @safe pure nothrow 3448 unittest 3449 { 3450 import mir.math.sum: Summation; 3451 import mir.ndslice.slice: sliced; 3452 import mir.test: shouldApprox; 3453 3454 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3455 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3456 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3457 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3458 3459 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v; 3460 v.put(x, y); 3461 3462 v.covariance(true).shouldApprox == -5.5 / 12; 3463 v.covariance(false).shouldApprox == -5.5 / 11; 3464 3465 v.correlation.shouldApprox == -0.0623684; 3466 3467 v.put(4.0, 3.0); 3468 v.covariance(true).shouldApprox == -5.5 / 13; 3469 v.covariance(false).shouldApprox == -5.5 / 12; 3470 3471 v.correlation.shouldApprox == -0.0611234; 3472 } 3473 3474 // Check dynamic array 3475 version(mir_stat_test) 3476 @safe pure nothrow 3477 unittest 3478 { 3479 import mir.math.sum: Summation; 3480 import mir.ndslice.slice: sliced; 3481 import mir.test: shouldApprox; 3482 3483 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3484 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3485 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3486 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3487 3488 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v; 3489 v.put(x, y); 3490 3491 v.covariance(true).shouldApprox == -5.5 / 12; 3492 v.covariance(false).shouldApprox == -5.5 / 11; 3493 3494 v.meanLeft.shouldApprox == 2.4375; 3495 v.meanRight.shouldApprox == 3; 3496 3497 v.correlation.shouldApprox == -0.0623684; 3498 3499 v.put(4.0, 3.0); 3500 v.covariance(true).shouldApprox == -5.5 / 13; 3501 v.covariance(false).shouldApprox == -5.5 / 12; 3502 3503 v.correlation.shouldApprox == -0.0611234; 3504 } 3505 3506 // rcslice test 3507 version(mir_stat_test) 3508 @safe pure nothrow @nogc 3509 unittest 3510 { 3511 import mir.math.sum: Summation; 3512 import mir.ndslice.allocation: mininitRcslice; 3513 import mir.test: shouldApprox; 3514 3515 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3516 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3517 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3518 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3519 auto x = mininitRcslice!double(12); 3520 auto y = mininitRcslice!double(12); 3521 x[] = a; 3522 y[] = b; 3523 auto v = CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive)(x, y); 3524 3525 v.covariance(true).shouldApprox == -5.5 / 12; 3526 v.covariance(false).shouldApprox == -5.5 / 11; 3527 3528 v.correlation.shouldApprox == -0.0623684; 3529 } 3530 3531 // Check adding CorrelationAccumultors 3532 version(mir_stat_test) 3533 @safe pure nothrow 3534 unittest 3535 { 3536 import mir.math.sum: sum, Summation; 3537 import mir.ndslice.slice: sliced; 3538 import mir.test: shouldApprox; 3539 3540 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 3541 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 3542 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3543 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3544 3545 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v1; 3546 v1.put(x1, y1); 3547 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v2; 3548 v2.put(x2, y2); 3549 v1.put(v2); 3550 3551 v1.covariance(true).shouldApprox == -5.5 / 12; 3552 v1.covariance(false).shouldApprox == -5.5 / 11; 3553 3554 v1.correlation.shouldApprox == -0.0623684; 3555 } 3556 3557 // Check adding CorrelationAccumultors (naive) 3558 version(mir_stat_test) 3559 @safe pure nothrow 3560 unittest 3561 { 3562 import mir.math.sum: sum, Summation; 3563 import mir.ndslice.slice: sliced; 3564 import mir.test: shouldApprox; 3565 3566 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 3567 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 3568 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3569 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3570 3571 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v1; 3572 v1.put(x1, y1); 3573 CorrelationAccumulator!(double, CorrelationAlgo.naive, Summation.naive) v2; 3574 v2.put(x2, y2); 3575 v1.put(v2); 3576 3577 v1.covariance(true).shouldApprox == -5.5 / 12; 3578 v1.covariance(false).shouldApprox == -5.5 / 11; 3579 3580 v1.correlation.shouldApprox == -0.0623684; 3581 } 3582 3583 // Check adding CorrelationAccumultors (twoPass) 3584 version(mir_stat_test) 3585 @safe pure nothrow 3586 unittest 3587 { 3588 import mir.math.sum: sum, Summation; 3589 import mir.ndslice.slice: sliced; 3590 import mir.test: shouldApprox; 3591 3592 auto x1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 3593 auto y1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 3594 auto x2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3595 auto y2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3596 3597 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v1; 3598 v1.put(x1, y1); 3599 auto v2 = CorrelationAccumulator!(double, CorrelationAlgo.twoPass, Summation.naive)(x2, y2); 3600 v1.put(v2); 3601 3602 v1.covariance(true).shouldApprox == -5.5 / 12; 3603 v1.covariance(false).shouldApprox == -5.5 / 11; 3604 3605 v1.correlation.shouldApprox == -0.0623684; 3606 } 3607 3608 // Check adding CorrelationAccumultors (assumeZeroMean) 3609 version(mir_stat_test) 3610 @safe pure nothrow 3611 unittest 3612 { 3613 import mir.math.sum: sum, Summation; 3614 import mir.ndslice.slice: sliced; 3615 import mir.stat.transform: center; 3616 import mir.test: shouldApprox; 3617 3618 auto a1 = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25].sliced; 3619 auto b1 = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5].sliced; 3620 auto a2 = [ 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3621 auto b2 = [ 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3622 auto x1 = a1.center; 3623 auto y1 = b1.center; 3624 auto x2 = a2.center; 3625 auto y2 = b2.center; 3626 3627 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v1; 3628 v1.put(x1, y1); 3629 auto v2 = CorrelationAccumulator!(double, CorrelationAlgo.assumeZeroMean, Summation.naive)(x2, y2); 3630 v1.put(v2); 3631 3632 v1.covariance(true).shouldApprox == -1.9375 / 12; //note: different from above due to inconsistent centering 3633 v1.covariance(false).shouldApprox == -1.9375 / 11; //note: different from above due to inconsistent centering 3634 3635 v1.correlation.shouldApprox == -0.0229089; //note: different from above due to inconsistent centering 3636 } 3637 3638 // Initializing with one point 3639 version(mir_stat_test) 3640 @safe pure nothrow 3641 unittest 3642 { 3643 import mir.math.sum: Summation; 3644 import mir.test: should; 3645 3646 auto v = CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive)(4.0, 3.0); 3647 v.covariance(true).should == 0; 3648 } 3649 3650 // Test input range 3651 version(mir_stat_test) 3652 @safe pure nothrow 3653 unittest 3654 { 3655 import mir.math.sum: Summation; 3656 import mir.test: should, shouldApprox; 3657 import std.algorithm: map; 3658 import std.range: chunks, iota; 3659 3660 auto x1 = iota(0, 5); 3661 auto y1 = iota(-3, 2); 3662 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v1; 3663 v1.put(x1, y1); 3664 v1.covariance(true).should == 10.0 / 5; 3665 v1.correlation.shouldApprox == 1; 3666 // this version can't use elementCount 3667 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v2; 3668 auto x2 = x1.map!(a => 2 * a); 3669 auto y2 = y1.map!(a => 2 * a); 3670 v2.put(x2, y2); 3671 v2.covariance(true).should == 40.0 / 5; 3672 v2.correlation.shouldApprox == 1; 3673 CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive) v3; 3674 v3.put(x1.chunks(1), y1.chunks(1)); 3675 v3.covariance(true).should == 10.0 / 5; 3676 v3.correlation.shouldApprox == 1; 3677 auto v4 = CorrelationAccumulator!(double, CorrelationAlgo.hybrid, Summation.naive)(x1.chunks(1), y1.chunks(1)); 3678 v4.covariance(true).should == 10.0 / 5; 3679 v4.correlation.shouldApprox == 1; 3680 } 3681 3682 /++ 3683 Calculates the correlation of the inputs. 3684 3685 If `x` and `y` are both slices or convertible to slices, then they must be 3686 one-dimensional. 3687 3688 Params: 3689 F = controls type of output 3690 correlationAlgo = algorithm for calculating correlation (default: CorrelationAlgo.hybrid) 3691 summation = algorithm for calculating sums (default: Summation.appropriate) 3692 Returns: 3693 The correlation of the inputs 3694 +/ 3695 template correlation(F, 3696 CorrelationAlgo correlationAlgo = CorrelationAlgo.hybrid, 3697 Summation summation = Summation.appropriate) 3698 if (isFloatingPoint!F) 3699 { 3700 import mir.math.common: fmamath; 3701 import mir.primitives: isInputRange; 3702 import mir.math.sum: ResolveSummationType; 3703 /++ 3704 Params: 3705 x = range, must be finite iterable 3706 y = range, must be finite iterable 3707 +/ 3708 @fmamath F correlation(RangeX, RangeY)(RangeX x, RangeY y) 3709 if (isInputRange!RangeX && isInputRange!RangeY) 3710 { 3711 import core.lifetime: move; 3712 3713 auto correlationAccumulator = CorrelationAccumulator!(F, correlationAlgo, ResolveSummationType!(summation, RangeX, F))(x.move, y.move); 3714 return correlationAccumulator.correlation(); 3715 } 3716 } 3717 3718 /// ditto 3719 template correlation( 3720 CorrelationAlgo correlationAlgo = CorrelationAlgo.hybrid, 3721 Summation summation = Summation.appropriate) 3722 { 3723 import mir.math.common: fmamath; 3724 import mir.primitives: isInputRange; 3725 import mir.stat.descriptive.univariate: stdevType; 3726 import std.traits: CommonType; 3727 /++ 3728 Params: 3729 x = range, must be finite iterable 3730 y = range, must be finite iterable 3731 +/ 3732 @fmamath CommonType!(stdevType!RangeX, stdevType!RangeY) correlation(RangeX, RangeY)(RangeX x, RangeY y) 3733 if (isInputRange!RangeX && isInputRange!RangeY) 3734 { 3735 import core.lifetime: move; 3736 3737 alias F = typeof(return); 3738 return .correlation!(F, correlationAlgo, summation)(x.move, y.move); 3739 } 3740 } 3741 3742 /// ditto 3743 template correlation(F, string correlationAlgo, string summation = "appropriate") 3744 { 3745 mixin("alias correlation = .correlation!(F, CorrelationAlgo." ~ correlationAlgo ~ ", Summation." ~ summation ~ ");"); 3746 } 3747 3748 /// ditto 3749 template correlation(string correlationAlgo, string summation = "appropriate") 3750 { 3751 mixin("alias correlation = .correlation!(CorrelationAlgo." ~ correlationAlgo ~ ", Summation." ~ summation ~ ");"); 3752 } 3753 3754 /// Correlation of vectors 3755 version(mir_stat_test) 3756 @safe pure nothrow 3757 unittest 3758 { 3759 import mir.ndslice.slice: sliced; 3760 import mir.test: shouldApprox; 3761 3762 auto x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3763 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3764 auto y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3765 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3766 3767 x.correlation(y).shouldApprox == -0.0623684; 3768 } 3769 3770 /// Can also set algorithm type 3771 version(mir_stat_test) 3772 @safe pure nothrow 3773 unittest 3774 { 3775 import mir.math.common: approxEqual; 3776 import mir.ndslice.slice: sliced; 3777 import mir.test: shouldApprox; 3778 3779 auto a = [0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3780 2.0, 7.5, 5.0, 1.0, 1.5, 0.0].sliced; 3781 auto b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3782 9.25, -0.75, 2.5, 1.25, -1, 2.25].sliced; 3783 3784 auto x = a + 10.0 ^^ 9; 3785 auto y = b + 10.0 ^^ 9; 3786 3787 x.correlation(y).shouldApprox == -0.0623684; 3788 3789 // The naive algorithm is numerically unstable in this case 3790 //assert(!x.correlation!"naive"(y).approxEqual(-0.0623684)); 3791 3792 // The two-pass algorithm provides the same answer as hybrid 3793 x.correlation!"twoPass"(y).shouldApprox == -0.0623684; 3794 3795 // And the assumeZeroMean algorithm is way off 3796 assert(!x.correlation!"assumeZeroMean"(y).approxEqual(-0.0623684)); 3797 } 3798 3799 /// Can also set algorithm or output type 3800 version(mir_stat_test) 3801 @safe pure nothrow 3802 unittest 3803 { 3804 import mir.ndslice.slice: sliced; 3805 import mir.ndslice.topology: repeat; 3806 import mir.test: shouldApprox; 3807 3808 //Set population covariance, covariance algorithm, sum algorithm or output type 3809 3810 auto a = [1.0, 1e100, 1, -1e100].sliced; 3811 auto b = [1.0e100, 1, 1, -1e100].sliced; 3812 auto x = a * 10_000; 3813 auto y = b * 10_000; 3814 3815 /++ 3816 Due to Floating Point precision, when centering `x`, subtracting the mean 3817 from the second and fourth numbers has no effect (for `y` the same is true 3818 for the first and fourth). Further, after centering and multiplying `x` and 3819 `y`, the third numbers in the slice has precision too low to be included in 3820 the centered sum of the products. For the calculations below, the "true" 3821 correlation should be a tiny amount above 0.5, but it is as if the 3822 calculation happens between [0, 1, 0, -1] and [1, 0, 0, -1]. 3823 +/ 3824 x.correlation(y).shouldApprox == 0.5; 3825 3826 x.correlation!("online")(y).shouldApprox == 0.5; 3827 x.correlation!("online", "kbn")(y).shouldApprox == 0.5; 3828 x.correlation!("online", "kb2")(y).shouldApprox == 0.5; 3829 x.correlation!("online", "precise")(y).shouldApprox == 0.5; 3830 x.correlation!(double, "online", "precise")(y).shouldApprox == 0.5; 3831 3832 auto z1 = [uint.max - 2, uint.max - 1, uint.max].sliced; 3833 auto z2 = [uint.max - 3, uint.max - 2, uint.max - 1].sliced; 3834 z1.correlation(z2).shouldApprox == 1.0; 3835 static assert(is(typeof(z1.correlation!float(z2)) == float)); 3836 } 3837 3838 /++ 3839 For integral slices, pass output type as template parameter to ensure output 3840 type is correct. 3841 +/ 3842 version(mir_stat_test) 3843 @safe pure nothrow 3844 unittest 3845 { 3846 import mir.ndslice.slice: sliced; 3847 import mir.test: shouldApprox; 3848 3849 auto x = [0, 1, 1, 2, 4, 4, 3850 2, 7, 5, 1, 2, 0].sliced; 3851 auto y = [6, 3, 7, 1, 1, 1, 3852 9, 5, 3, 1, 3, 7].sliced; 3853 3854 x.correlation(y).shouldApprox == -0.27934577; 3855 static assert(is(typeof(x.correlation(y)) == double)); 3856 3857 x.correlation!float(y).shouldApprox == -0.27934577; 3858 static assert(is(typeof(x.correlation!float(y)) == float)); 3859 } 3860 3861 // make sure works with dynamic array 3862 version(mir_stat_test) 3863 @safe pure nothrow 3864 unittest 3865 { 3866 import mir.test: shouldApprox; 3867 3868 double[] x = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3869 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3870 double[] y = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3871 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3872 x.correlation(y).shouldApprox == -0.0623684; 3873 } 3874 3875 /// Works with @nogc 3876 version(mir_stat_test) 3877 @safe pure nothrow @nogc 3878 unittest 3879 { 3880 import mir.ndslice.allocation: mininitRcslice; 3881 import mir.test: shouldApprox; 3882 3883 static immutable a = [ 0.0, 1.0, 1.5, 2.0, 3.5, 4.25, 3884 2.0, 7.5, 5.0, 1.0, 1.5, 0.0]; 3885 static immutable b = [-0.75, 6.0, -0.25, 8.25, 5.75, 3.5, 3886 9.25, -0.75, 2.5, 1.25, -1, 2.25]; 3887 auto x = mininitRcslice!double(12); 3888 auto y = mininitRcslice!double(12); 3889 x[] = a; 3890 y[] = b; 3891 3892 x.correlation(y).shouldApprox == -0.0623684; 3893 } 3894 3895 // compile with dub test --build=unittest-perf --config=unittest-perf --compiler=ldc2 3896 version(mir_stat_test_cor_performance) 3897 unittest 3898 { 3899 import mir.math.sum: Summation; 3900 import mir.math.internal.benchmark; 3901 import std.stdio: writeln; 3902 import std.traits: EnumMembers; 3903 3904 template staticMap(alias fun, alias S, args...) 3905 { 3906 import std.meta: AliasSeq; 3907 alias staticMap = AliasSeq!(); 3908 static foreach (arg; args) 3909 staticMap = AliasSeq!(staticMap, fun!(double, arg, S)); 3910 } 3911 3912 size_t n = 10_000; 3913 size_t m = 1_000; 3914 3915 alias S = Summation.fast; 3916 alias E = EnumMembers!CorrelationAlgo; 3917 alias fs = staticMap!(correlation, S, E); 3918 double[fs.length] output; 3919 3920 auto e = [E]; 3921 auto time = benchmarkRandom2!(fs)(n, m, output); 3922 writeln("Correlation performance test"); 3923 foreach (size_t i; 0 .. fs.length) { 3924 writeln("Function ", i + 1, ", Algo: ", e[i], ", Output: ", output[i], ", Elapsed time: ", time[i]); 3925 } 3926 writeln(); 3927 }