1 /++ 2 $(H1 Nonlinear Least Squares Solver) 3 4 Copyright: Copyright © 2018, Symmetry Investments & Kaleidic Associates Advisory Limited 5 Authors: Ilya Yaroshenko 6 7 Macros: 8 NDSLICE = $(REF_ALTTEXT $(TT $2), $2, mir, ndslice, $1)$(NBSP) 9 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 10 +/ 11 module mir.optim.least_squares; 12 13 import mir.ndslice.slice: Slice, SliceKind, Contiguous, sliced; 14 import std.meta; 15 import std.traits; 16 import lapack: lapackint; 17 18 /++ 19 +/ 20 enum LeastSquaresStatus 21 { 22 /// Maximum number of iterations reached 23 maxIterations = -1, 24 /// The algorithm cann't improve the solution 25 furtherImprovement, 26 /// Stationary values 27 xConverged, 28 /// Stationary gradient 29 gConverged, 30 /// Good (small) residual 31 fConverged, 32 /// 33 badBounds = -32, 34 /// 35 badGuess, 36 /// 37 badMinStepQuality, 38 /// 39 badGoodStepQuality, 40 /// 41 badStepQuality, 42 /// 43 badLambdaParams, 44 /// 45 numericError, 46 } 47 48 version(D_Exceptions) 49 { 50 /+ 51 Exception for $(LREF optimize). 52 +/ 53 private static immutable leastSquaresException_maxIterations = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.maxIterations.leastSquaresStatusString); 54 private static immutable leastSquaresException_badBounds = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badBounds.leastSquaresStatusString); 55 private static immutable leastSquaresException_badGuess = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badGuess.leastSquaresStatusString); 56 private static immutable leastSquaresException_badMinStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badMinStepQuality.leastSquaresStatusString); 57 private static immutable leastSquaresException_badGoodStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badGoodStepQuality.leastSquaresStatusString); 58 private static immutable leastSquaresException_badStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badStepQuality.leastSquaresStatusString); 59 private static immutable leastSquaresException_badLambdaParams = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badLambdaParams.leastSquaresStatusString); 60 private static immutable leastSquaresException_numericError = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.numericError.leastSquaresStatusString); 61 private static immutable leastSquaresExceptions = [ 62 leastSquaresException_badBounds, 63 leastSquaresException_badGuess, 64 leastSquaresException_badMinStepQuality, 65 leastSquaresException_badGoodStepQuality, 66 leastSquaresException_badStepQuality, 67 leastSquaresException_badLambdaParams, 68 leastSquaresException_numericError, 69 ]; 70 } 71 72 /// Delegates for low level D API. 73 alias LeastSquaresFunction(T) = void delegate(Slice!(const(T)*) x, Slice!(T*) y) @safe nothrow @nogc pure; 74 /// ditto 75 alias LeastSquaresJacobian(T) = void delegate(Slice!(const(T)*) x, Slice!(T*, 2) J) @safe nothrow @nogc pure; 76 77 /// Delegates for low level C API. 78 alias LeastSquaresFunctionBetterC(T) = extern(C) void function(scope void* context, size_t m, size_t n, const(T)* x, T* y) @system nothrow @nogc pure; 79 /// 80 alias LeastSquaresJacobianBetterC(T) = extern(C) void function(scope void* context, size_t m, size_t n, const(T)* x, T* J) @system nothrow @nogc pure; 81 82 /++ 83 Least-Squares iteration settings. 84 +/ 85 struct LeastSquaresSettings(T) 86 if (is(T == double) || is(T == float)) 87 { 88 import mir.optim.boxcqp; 89 import mir.math.common: sqrt; 90 import mir.math.constant: GoldenRatio; 91 import lapack: lapackint; 92 93 /// Maximum number of iterations 94 uint maxIterations = 1000; 95 /// Maximum jacobian model age (0 for default selection) 96 uint maxAge; 97 /// epsilon for finite difference Jacobian approximation 98 T jacobianEpsilon = T(2) ^^ ((1 - T.mant_dig) / 2); 99 /// Absolute tolerance for step size, L2 norm 100 T absTolerance = T.epsilon; 101 /// Relative tolerance for step size, L2 norm 102 T relTolerance = 0; 103 /// Absolute tolerance for gradient, L-inf norm 104 T gradTolerance = T.epsilon; 105 /// The algorithm stops iteration when the residual value is less or equal to `maxGoodResidual`. 106 T maxGoodResidual = T.epsilon ^^ 2; 107 /// maximum norm of iteration step 108 T maxStep = T.max.sqrt / 16; 109 /// minimum trust region radius 110 T maxLambda = T.max / 16; 111 /// maximum trust region radius 112 T minLambda = T.min_normal * 16; 113 /// for steps below this quality, the trust region is shrinked 114 T minStepQuality = 0.1; 115 /// for steps above this quality, the trust region is expanded 116 T goodStepQuality = 0.5; 117 /// `lambda` is multiplied by this factor after step below min quality 118 T lambdaIncrease = 2; 119 /// `lambda` is multiplied by this factor after good quality steps 120 T lambdaDecrease = 1 / (GoldenRatio * 2); 121 /// Bound constrained convex quadratic problem settings 122 BoxQPSettings!T qpSettings; 123 } 124 125 /++ 126 Least-Squares results. 127 +/ 128 struct LeastSquaresResult(T) 129 if (is(T == double) || is(T == float)) 130 { 131 /// Computation status 132 LeastSquaresStatus status = LeastSquaresStatus.numericError; 133 /// Successful step count 134 uint iterations; 135 /// Number of the function calls 136 uint fCalls; 137 /// Number of the Jacobian calls 138 uint gCalls; 139 /// Final residual 140 T residual = T.infinity; 141 /// LMA variable for (inverse of) initial trust region radius 142 T lambda = 0; 143 } 144 145 /++ 146 High level D API for Levenberg-Marquardt Algorithm. 147 148 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Levenberg-Marquardt 149 algorithm, and an estimate of the Jacobian of `f` at x. 150 151 The function `f` should take an input vector of length `n`, and fill an output 152 vector of length `m`. 153 154 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 155 156 Throws: $(LREF LeastSquaresException) 157 Params: 158 f = `n -> m` function 159 g = `m × n` Jacobian (optional) 160 tm = thread manager for finite difference jacobian approximation in case of g is null (optional) 161 settings = Levenberg-Marquardt data structure 162 taskPool = task Pool with `.parallel` method for finite difference jacobian approximation in case of g is null (optional) 163 See_also: $(LREF optimizeLeastSquares) 164 +/ 165 LeastSquaresResult!T optimize(alias f, alias g = null, alias tm = null, T)( 166 scope const ref LeastSquaresSettings!T settings, 167 size_t m, 168 Slice!(T*) x, 169 Slice!(const(T)*) l, 170 Slice!(const(T)*) u, 171 ) 172 if ((is(T == float) || is(T == double))) 173 { 174 auto ret = optimizeLeastSquares!(f, g, tm, T)(settings, m, x, l, u); 175 if (ret.status == -1) 176 throw leastSquaresException_maxIterations; 177 else 178 if (ret.status < -1) 179 throw leastSquaresExceptions[ret.status + 32]; 180 return ret; 181 } 182 183 /// ditto 184 LeastSquaresResult!T optimize(alias f, TaskPool, T)( 185 scope const ref LeastSquaresSettings!T settings, 186 size_t m, 187 Slice!(T*) x, 188 Slice!(const(T)*) l, 189 Slice!(const(T)*) u, 190 TaskPool taskPool) 191 if (is(T == float) || is(T == double)) 192 { 193 auto tm = delegate(uint count, scope LeastSquaresTask task) 194 { 195 version(all) 196 { 197 import mir.ndslice.topology: iota; 198 foreach(i; taskPool.parallel(count.iota!uint)) 199 task(cast(uint)taskPool.size, cast(uint)(taskPool.size <= 1 ? 0 : taskPool.workerIndex - 1), i); 200 } 201 else // for debug 202 { 203 foreach(i; 0 .. count) 204 task(1, 0, i); 205 } 206 }; 207 208 auto ret = optimizeLeastSquares!(f, null, tm, T)(settings, m, x, l, u); 209 if (ret.status == -1) 210 throw leastSquaresException_maxIterations; 211 else 212 if (ret.status < -1) 213 throw leastSquaresExceptions[ret.status + 32]; 214 return ret; 215 } 216 217 /// With Jacobian 218 version(mir_optim_test) 219 @safe unittest 220 { 221 import mir.ndslice.allocation: slice; 222 import mir.ndslice.slice: sliced; 223 import mir.blas: nrm2; 224 225 LeastSquaresSettings!double settings; 226 auto x = [100.0, 100].sliced; 227 auto l = x.shape.slice(-double.infinity); 228 auto u = x.shape.slice(+double.infinity); 229 optimize!( 230 (x, y) 231 { 232 y[0] = x[0]; 233 y[1] = 2 - x[1]; 234 }, 235 (x, J) 236 { 237 J[0, 0] = 1; 238 J[0, 1] = 0; 239 J[1, 0] = 0; 240 J[1, 1] = -1; 241 }, 242 )(settings, 2, x, l, u); 243 244 assert(nrm2((x - [0, 2].sliced).slice) < 1e-8); 245 } 246 247 /// Using Jacobian finite difference approximation computed using in multiple threads. 248 version(mir_optim_test) 249 unittest 250 { 251 import mir.ndslice.allocation: slice; 252 import mir.ndslice.slice: sliced; 253 import mir.blas: nrm2; 254 import std.parallelism: taskPool; 255 256 LeastSquaresSettings!double settings; 257 auto x = [-1.2, 1].sliced; 258 auto l = x.shape.slice(-double.infinity); 259 auto u = x.shape.slice(+double.infinity); 260 settings.optimize!( 261 (x, y) // Rosenbrock function 262 { 263 y[0] = 10 * (x[1] - x[0]^^2); 264 y[1] = 1 - x[0]; 265 }, 266 )(2, x, l, u, taskPool); 267 268 // import std.stdio; 269 // writeln(settings); 270 // writeln(x); 271 272 assert(nrm2((x - [1, 1].sliced).slice) < 1e-6); 273 } 274 275 /// Rosenbrock 276 version(mir_optim_test) 277 @safe unittest 278 { 279 import mir.algorithm.iteration: all; 280 import mir.ndslice.allocation: slice; 281 import mir.ndslice.slice: Slice, sliced; 282 import mir.blas: nrm2; 283 284 LeastSquaresSettings!double settings; 285 auto x = [-1.2, 1].sliced; 286 auto l = x.shape.slice(-double.infinity); 287 auto u = x.shape.slice(+double.infinity); 288 289 alias rosenbrockRes = (x, y) 290 { 291 y[0] = 10 * (x[1] - x[0]^^2); 292 y[1] = 1 - x[0]; 293 }; 294 295 alias rosenbrockJac = (x, J) 296 { 297 J[0, 0] = -20 * x[0]; 298 J[0, 1] = 10; 299 J[1, 0] = -1; 300 J[1, 1] = 0; 301 }; 302 303 static class FFF 304 { 305 static auto opCall(Slice!(const(double)*) x, Slice!(double*, 2) J) 306 { 307 rosenbrockJac(x, J); 308 } 309 } 310 311 settings.optimize!(rosenbrockRes, FFF)(2, x, l, u); 312 313 // import std.stdio; 314 315 // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls, " x = ", x); 316 317 assert(nrm2((x - [1, 1].sliced).slice) < 1e-8); 318 319 ///// 320 321 settings = settings.init; 322 x[] = [150.0, 150.0]; 323 l[] = [10.0, 10.0]; 324 u[] = [200.0, 200.0]; 325 326 settings.optimize!(rosenbrockRes, rosenbrockJac)(2, x, l, u); 327 328 // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls, " ", x); 329 assert(nrm2((x - [10, 100].sliced).slice) < 1e-5); 330 assert(x.all!"a >= 10"); 331 } 332 333 /// 334 version(mir_optim_test) 335 @safe unittest 336 { 337 import mir.blas: nrm2; 338 import mir.math.common; 339 import mir.ndslice.allocation: slice; 340 import mir.ndslice.topology: linspace, map; 341 import mir.ndslice.slice: sliced; 342 import mir.random; 343 import mir.random.algorithm; 344 import mir.random.variable; 345 import std.parallelism: taskPool; 346 347 alias model = (x, p) => p[0] * map!exp(-x * p[1]); 348 349 auto p = [1.0, 2.0]; 350 351 auto xdata = [20].linspace([0.0, 10.0]); 352 auto rng = Random(12345); 353 auto ydata = slice(model(xdata, p) + 0.01 * rng.randomSlice(normalVar, xdata.shape)); 354 355 auto x = [0.5, 0.5].sliced; 356 auto l = x.shape.slice(-double.infinity); 357 auto u = x.shape.slice(+double.infinity); 358 359 LeastSquaresSettings!double settings; 360 settings.optimize!((p, y) => y[] = model(xdata, p) - ydata)(ydata.length, x, l, u); 361 362 assert((x - [1.0, 2.0].sliced).slice.nrm2 < 0.05); 363 } 364 365 /// 366 version(mir_optim_test) 367 @safe pure unittest 368 { 369 import mir.algorithm.iteration: all; 370 import mir.ndslice.allocation: slice; 371 import mir.ndslice.topology: map, repeat, iota; 372 import mir.ndslice.slice: sliced; 373 import mir.random; 374 import mir.random.variable; 375 import mir.random.algorithm; 376 import mir.math.common; 377 378 alias model = (x, p) => p[0] * map!exp(-x / p[1]) + p[2]; 379 380 auto xdata = iota([100], 1); 381 auto rng = Random(12345); 382 auto ydata = slice(model(xdata, [10.0, 10.0, 10.0]) + 0.1 * rng.randomSlice(normalVar, xdata.shape)); 383 384 LeastSquaresSettings!double settings; 385 386 auto x = [15.0, 15.0, 15.0].sliced; 387 auto l = [5.0, 11.0, 5.0].sliced; 388 auto u = x.shape.slice(+double.infinity); 389 390 settings.optimize!((p, y) => y[] = model(xdata, p) - ydata) 391 (ydata.length, x, l, u); 392 393 assert(all!"a >= b"(x, l)); 394 395 // import std.stdio; 396 397 // writeln(x); 398 // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls); 399 400 settings = settings.init; 401 x[] = [5.0, 5.0, 5.0]; 402 l[] = -double.infinity; 403 u[] = [15.0, 9.0, 15.0]; 404 settings.optimize!((p, y) => y[] = model(xdata, p) - ydata) 405 (ydata.length, x, l , u); 406 407 assert(x.all!"a <= b"(u)); 408 409 // writeln(x); 410 // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls); 411 } 412 413 /// 414 version(mir_optim_test) 415 @safe pure unittest 416 { 417 import mir.blas: nrm2; 418 import mir.math.common: sqrt; 419 import mir.ndslice.allocation: slice; 420 import mir.ndslice.slice: sliced; 421 422 LeastSquaresSettings!double settings; 423 auto x = [0.001, 0.0001].sliced; 424 auto l = [-0.5, -0.5].sliced; 425 auto u = [0.5, 0.5].sliced; 426 settings.optimize!( 427 (x, y) 428 { 429 y[0] = sqrt(1 - (x[0] ^^ 2 + x[1] ^^ 2)); 430 }, 431 )(1, x, l, u); 432 433 assert(nrm2((x - u).slice) < 1e-8); 434 } 435 436 /++ 437 High level nothtow D API for Levenberg-Marquardt Algorithm. 438 439 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Least-Squares 440 algorithm, and an estimate of the Jacobian of `f` at x. 441 442 The function `f` should take an input vector of length `n`, and fill an output 443 vector of length `m`. 444 445 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 446 447 Returns: optimization status. 448 Params: 449 f = `n -> m` function 450 g = `m × n` Jacobian (optional) 451 tm = thread manager for finite difference jacobian approximation in case of g is null (optional) 452 settings = Levenberg-Marquardt data structure 453 m = length (dimension) of `y = f(x)` 454 x = initial (in) and final (out) X value 455 l = lower X bound 456 u = upper X bound 457 See_also: $(LREF optimize) 458 +/ 459 LeastSquaresResult!T optimizeLeastSquares(alias f, alias g = null, alias tm = null, T)( 460 scope const ref LeastSquaresSettings!T settings, 461 size_t m, 462 Slice!(T*) x, 463 Slice!(const(T)*) l, 464 Slice!(const(T)*) u, 465 ) 466 { 467 scope fInst = delegate(Slice!(const(T)*) x, Slice!(T*) y) 468 { 469 f(x, y); 470 }; 471 if (false) 472 { 473 fInst(x, x); 474 } 475 static if (is(typeof(g) == typeof(null))) 476 enum LeastSquaresJacobian!T gInst = null; 477 else 478 { 479 scope gInst = delegate(Slice!(const(T)*) x, Slice!(T*, 2) J) 480 { 481 g(x, J); 482 }; 483 static if (isNullableFunction!(g)) 484 if (!g) 485 gInst = null; 486 if (false) 487 { 488 Slice!(T*, 2) J; 489 gInst(x, J); 490 } 491 } 492 493 static if (is(typeof(tm) == typeof(null))) 494 enum LeastSquaresThreadManager tmInst = null; 495 else 496 { 497 scope tmInst = delegate( 498 uint count, 499 scope LeastSquaresTask task) 500 { 501 tm(count, task); 502 }; 503 static if (isNullableFunction!(tm)) 504 if (!tm) 505 tmInst = null; 506 if (false) with(settings) 507 tmInst(0, null); 508 } 509 510 auto n = x.length; 511 import mir.ndslice.allocation: rcslice; 512 auto work = rcslice!T(mir_least_squares_work_length(m, n)); 513 auto iwork = rcslice!lapackint(mir_least_squares_iwork_length(m, n)); 514 auto workS = work.lightScope; 515 auto iworkS = iwork.lightScope; 516 return optimizeLeastSquares!T(settings, m, x, l, u, workS, iworkS, fInst.trustedAllAttr, gInst.trustedAllAttr, tmInst.trustedAllAttr); 517 } 518 519 /++ 520 Status string for low (extern) and middle (nothrow) levels D API. 521 Params: 522 st = optimization status 523 Returns: description for $(LeastSquaresStatus)furtherImprovement 524 +/ 525 pragma(inline, false) 526 string leastSquaresStatusString(LeastSquaresStatus st) @safe pure nothrow @nogc 527 { 528 final switch(st) with(LeastSquaresStatus) 529 { 530 case furtherImprovement: 531 return "The algorithm cann't improve the solution"; 532 case maxIterations: 533 return "Maximum number of iterations reached"; 534 case xConverged: 535 return "X converged"; 536 case gConverged: 537 return "Jacobian converged"; 538 case fConverged: 539 return "Residual is small enough"; 540 case badBounds: 541 return "Initial guess must be within bounds."; 542 case badGuess: 543 return "Initial guess must be an array of finite numbers."; 544 case badMinStepQuality: 545 return "0 <= minStepQuality < 1 must hold."; 546 case badGoodStepQuality: 547 return "0 < goodStepQuality <= 1 must hold."; 548 case badStepQuality: 549 return "minStepQuality < goodStepQuality must hold."; 550 case badLambdaParams: 551 return "1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt and T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1 must hold."; 552 case numericError: 553 return "Numeric Error"; 554 } 555 } 556 557 /// 558 alias LeastSquaresTask = void delegate( 559 uint totalThreads, 560 uint threadId, 561 uint i) 562 @safe nothrow @nogc pure; 563 564 /// 565 alias LeastSquaresTaskBetterC = extern(C) void function( 566 scope const LeastSquaresTask, 567 uint totalThreads, 568 uint threadId, 569 uint i) 570 @safe nothrow @nogc pure; 571 572 /// Thread manager delegate type for low level `extern(D)` API. 573 alias LeastSquaresThreadManager = void delegate( 574 uint count, 575 scope LeastSquaresTask task) 576 @safe nothrow @nogc pure; 577 578 /++ 579 Low level `extern(D)` instatiation. 580 Params: 581 settings = Levenberg-Marquardt data structure 582 m = length (dimension) of `y = f(x)` 583 x = initial (in) and final (out) X value 584 l = lower X bound 585 u = upper X bound 586 f = `n -> m` function 587 g = `m × n` Jacobian (optional) 588 tm = thread manager for finite difference jacobian approximation in case of g is null (optional) 589 work = floating point workspace length of at least $(LREF mir_least_squares_work_length) 590 iwork = floating point workspace length of at least $(LREF mir_least_squares_iwork_length) 591 +/ 592 pragma(inline, false) 593 LeastSquaresResult!double optimizeLeastSquaresD 594 ( 595 scope const ref LeastSquaresSettings!double settings, 596 size_t m, 597 Slice!(double*) x, 598 Slice!(const(double)*) l, 599 Slice!(const(double)*) u, 600 Slice!(double*) work, 601 Slice!(lapackint*) iwork, 602 scope LeastSquaresFunction!double f, 603 scope LeastSquaresJacobian!double g = null, 604 scope LeastSquaresThreadManager tm = null, 605 ) @trusted nothrow @nogc pure 606 { 607 return optimizeLeastSquaresImplGeneric!double(settings, m, x, l, u, work, iwork, f, g, tm); 608 } 609 610 611 /// ditto 612 pragma(inline, false) 613 LeastSquaresResult!float optimizeLeastSquaresS 614 ( 615 scope const ref LeastSquaresSettings!float settings, 616 size_t m, 617 Slice!(float*) x, 618 Slice!(const(float)*) l, 619 Slice!(const(float)*) u, 620 Slice!(float*) work, 621 Slice!(lapackint*) iwork, 622 scope LeastSquaresFunction!float f, 623 scope LeastSquaresJacobian!float g = null, 624 scope LeastSquaresThreadManager tm = null, 625 ) @trusted nothrow @nogc pure 626 { 627 return optimizeLeastSquaresImplGeneric!float(settings, 2, x, l, u, work, iwork, f, g, tm); 628 } 629 630 /// ditto 631 alias optimizeLeastSquares(T : double) = optimizeLeastSquaresD; 632 /// ditto 633 alias optimizeLeastSquares(T : float) = optimizeLeastSquaresS; 634 635 extern(C) @safe nothrow @nogc 636 { 637 /++ 638 +/ 639 @safe pure nothrow @nogc 640 size_t mir_least_squares_work_length(size_t m, size_t n) 641 { 642 import mir.optim.boxcqp: mir_box_qp_work_length; 643 return mir_box_qp_work_length(n) + n * 5 + n ^^ 2 + n * m + m * 2; 644 } 645 646 /++ 647 +/ 648 @safe pure nothrow @nogc 649 size_t mir_least_squares_iwork_length(size_t m, size_t n) 650 { 651 import mir.utility: max; 652 import mir.optim.boxcqp: mir_box_qp_iwork_length; 653 return max(mir_box_qp_iwork_length(n), n); 654 } 655 656 /++ 657 Status string for extern(C) API. 658 Params: 659 st = optimization status 660 Returns: description for $(LeastSquaresStatus) 661 +/ 662 extern(C) 663 pragma(inline, false) 664 immutable(char)* mir_least_squares_status_string(LeastSquaresStatus st) @trusted pure nothrow @nogc 665 { 666 return st.leastSquaresStatusString.ptr; 667 } 668 669 /// Thread manager function type for low level `extern(C)` API. 670 alias LeastSquaresThreadManagerBetterC = 671 extern(C) void function( 672 scope void* context, 673 uint count, 674 scope const LeastSquaresTask taskContext, 675 scope LeastSquaresTaskBetterC task) 676 @system nothrow @nogc pure; 677 678 /++ 679 Low level `extern(C)` wrapper instatiation. 680 Params: 681 settings = Levenberg-Marquardt data structure 682 fContext = context for the function 683 f = `n -> m` function 684 gContext = context for the Jacobian (optional) 685 g = `m × n` Jacobian (optional) 686 tm = thread manager for finite difference jacobian approximation in case of g is null (optional) 687 m = length (dimension) of `y = f(x)` 688 n = length (dimension) of X 689 x = initial (in) and final (out) X value 690 l = lower X bound 691 u = upper X bound 692 f = `n -> m` function 693 fContext = `f` context 694 g = `m × n` Jacobian (optional) 695 gContext = `g` context 696 tm = thread manager for finite difference jacobian approximation in case of g is null (optional) 697 tmContext = `tm` context 698 work = floating point workspace length of at least $(LREF mir_least_squares_work_length) 699 iwork = floating point workspace length of at least $(LREF mir_least_squares_iwork_length) 700 +/ 701 extern(C) 702 pragma(inline, false) 703 LeastSquaresResult!double mir_optimize_least_squares_d 704 ( 705 scope const ref LeastSquaresSettings!double settings, 706 size_t m, 707 size_t n, 708 double* x, 709 const(double)* l, 710 const(double)* u, 711 Slice!(double*) work, 712 Slice!(lapackint*) iwork, 713 scope void* fContext, 714 scope LeastSquaresFunctionBetterC!double f, 715 scope void* gContext = null, 716 scope LeastSquaresJacobianBetterC!double g = null, 717 scope void* tmContext = null, 718 scope LeastSquaresThreadManagerBetterC tm = null, 719 ) @system nothrow @nogc pure 720 { 721 return optimizeLeastSquaresImplGenericBetterC!double(settings, m, n, x, l, u, work, iwork, fContext, f, gContext, g, tmContext, tm); 722 } 723 724 /// ditto 725 extern(C) 726 pragma(inline, false) 727 LeastSquaresResult!float mir_optimize_least_squares_s 728 ( 729 scope const ref LeastSquaresSettings!float settings, 730 size_t m, 731 size_t n, 732 float* x, 733 const(float)* l, 734 const(float)* u, 735 Slice!(float*) work, 736 Slice!(lapackint*) iwork, 737 scope void* fContext, 738 scope LeastSquaresFunctionBetterC!float f, 739 scope void* gContext = null, 740 scope LeastSquaresJacobianBetterC!float g = null, 741 scope void* tmContext = null, 742 scope LeastSquaresThreadManagerBetterC tm = null, 743 ) @system nothrow @nogc pure 744 { 745 return optimizeLeastSquaresImplGenericBetterC!float(settings, m, n, x, l, u, work, iwork, fContext, f, gContext, g, tmContext, tm); 746 } 747 748 /// ditto 749 alias mir_optimize_least_squares(T : double) = mir_optimize_least_squares_d; 750 751 /// ditto 752 alias mir_optimize_least_squares(T : float) = mir_optimize_least_squares_s; 753 754 /++ 755 Initialize LM data structure with default params for iteration. 756 Params: 757 settings = Levenberg-Marquart data structure 758 +/ 759 void mir_least_squares_init_d(ref LeastSquaresSettings!double settings) pure 760 { 761 settings = settings.init; 762 } 763 764 /// ditto 765 void mir_least_squares_init_s(ref LeastSquaresSettings!float settings) pure 766 { 767 settings = settings.init; 768 } 769 770 /// ditto 771 alias mir_least_squares_init(T : double) = mir_least_squares_init_d; 772 773 /// ditto 774 alias mir_least_squares_init(T : float) = mir_least_squares_init_s; 775 776 /++ 777 Resets all counters and flags, fills `x`, `y`, `upper`, `lower`, vecors with default values. 778 Params: 779 settings = Levenberg-Marquart data structure 780 +/ 781 void mir_least_squares_reset_d(ref LeastSquaresSettings!double settings) pure 782 { 783 settings = settings.init; 784 } 785 786 /// ditto 787 void mir_least_squares_reset_s(ref LeastSquaresSettings!float settings) pure 788 { 789 settings = settings.init; 790 } 791 792 /// ditto 793 alias mir_least_squares_reset(T : double) = mir_least_squares_reset_d; 794 795 /// ditto 796 alias mir_least_squares_reset(T : float) = mir_least_squares_reset_s; 797 } 798 799 private: 800 801 LeastSquaresResult!T optimizeLeastSquaresImplGenericBetterC(T) 802 ( 803 scope const ref LeastSquaresSettings!T settings, 804 size_t m, 805 size_t n, 806 T* x, 807 const(T)* l, 808 const(T)* u, 809 Slice!(T*) work, 810 Slice!(lapackint*) iwork, 811 scope void* fContext, 812 scope LeastSquaresFunctionBetterC!T f, 813 scope void* gContext, 814 scope LeastSquaresJacobianBetterC!T g, 815 scope void* tmContext, 816 scope LeastSquaresThreadManagerBetterC tm, 817 ) @system nothrow @nogc pure 818 { 819 version(LDC) pragma(inline, true); 820 821 if (g) 822 return optimizeLeastSquares!T( 823 settings, 824 m, 825 x[0 .. n].sliced, 826 l[0 .. n].sliced, 827 u[0 .. n].sliced, 828 work, 829 iwork, 830 (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator), 831 (x, J) @trusted => g(gContext, J.length, x.length, x.iterator, J.iterator), 832 null 833 ); 834 835 LeastSquaresTaskBetterC taskFunction = (scope const LeastSquaresTask context, uint totalThreads, uint threadId, uint i) @trusted 836 { 837 context(totalThreads, threadId, i); 838 }; 839 840 if (tm) 841 return optimizeLeastSquares!T( 842 settings, 843 m, 844 x[0 .. n].sliced, 845 l[0 .. n].sliced, 846 u[0 .. n].sliced, 847 work, 848 iwork, 849 (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator), 850 null, 851 (count, scope LeastSquaresTask task) @trusted => tm(tmContext, count, task, taskFunction) 852 ); 853 return optimizeLeastSquares!T( 854 settings, 855 m, 856 x[0 .. n].sliced, 857 l[0 .. n].sliced, 858 u[0 .. n].sliced, 859 work, 860 iwork, 861 (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator), 862 null, 863 null 864 ); 865 } 866 867 // private auto assumePure(T)(T t) 868 // if (isFunctionPointer!T || isDelegate!T) 869 // { 870 // enum attrs = functionAttributes!T | FunctionAttribute.pure_; 871 // return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t; 872 // } 873 874 // LM algorithm 875 LeastSquaresResult!T optimizeLeastSquaresImplGeneric(T) 876 ( 877 scope const ref LeastSquaresSettings!T settings, 878 size_t m, 879 Slice!(T*) x, 880 Slice!(const(T)*) lower, 881 Slice!(const(T)*) upper, 882 Slice!(T*) work, 883 Slice!(lapackint*) iwork, 884 scope LeastSquaresFunction!T f, 885 scope LeastSquaresJacobian!T g, 886 scope LeastSquaresThreadManager tm, 887 ) @trusted nothrow @nogc pure 888 { typeof(return) ret; with(ret) with(settings){ 889 pragma(inline, false); 890 import mir.algorithm.iteration: all; 891 import mir.blas: axpy, gemv, scal, ger, copy, axpy, dot, swap, symv, iamax, syrk, Uplo, nrm2; 892 import mir.math.common; 893 import mir.math.sum: sum; 894 import mir.ndslice.allocation: stdcUninitSlice; 895 import mir.ndslice.dynamic: transposed; 896 import mir.ndslice.slice: sliced; 897 import mir.ndslice.topology: canonical, diagonal; 898 import mir.optim.boxcqp; 899 import mir.utility: max; 900 import mir.algorithm.iteration; 901 import core.stdc.stdio; 902 903 debug 904 { 905 work[] = 0; 906 iwork[] = 0; 907 } 908 909 auto n = cast(uint)x.length; 910 911 auto deltaX = work[0 .. n]; work = work[n .. $]; 912 auto Jy = work[0 .. n]; work = work[n .. $]; 913 auto nBuffer = work[0 .. n]; work = work[n .. $]; 914 915 auto JJ = work[0 .. n ^^ 2].sliced(n, n); work = work[n ^^ 2 .. $]; 916 auto J = work[0 .. m * n].sliced(m, n); work = work[m * n .. $]; 917 918 auto y = work[0 .. m]; work = work[m .. $]; 919 auto mBuffer = work[0 .. m]; work = work[m .. $]; 920 921 auto qpl = work[0 .. n]; work = work[n .. $]; 922 auto qpu = work[0 .. n]; work = work[n .. $]; 923 924 auto qpwork = work; 925 926 version(LDC) pragma(inline, true); 927 928 if (m == 0 || n == 0 || !x.all!"-a.infinity < a && a < a.infinity") 929 { status = LeastSquaresStatus.badGuess; return ret; } 930 if (!allLessOrEqual(lower, x) || !allLessOrEqual(x, upper)) 931 { status = LeastSquaresStatus.badBounds; return ret; } 932 if (!(0 <= minStepQuality && minStepQuality < 1)) 933 { status = LeastSquaresStatus.badMinStepQuality; return ret; } 934 if (!(0 <= goodStepQuality && goodStepQuality <= 1)) 935 { status = LeastSquaresStatus.badGoodStepQuality; return ret; } 936 if (!(minStepQuality < goodStepQuality)) 937 { status = LeastSquaresStatus.badStepQuality; return ret; } 938 if (!(1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt)) 939 { status = LeastSquaresStatus.badLambdaParams; return ret; } 940 if (!(T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1)) 941 { status = LeastSquaresStatus.badLambdaParams; return ret; } 942 943 auto maxAge = settings.maxAge ? settings.maxAge : g ? 3 : 2 * n; 944 945 if (!tm) tm = delegate(uint count, scope LeastSquaresTask task) pure @nogc nothrow @trusted 946 { 947 foreach(i; 0 .. count) 948 task(1, 0, i); 949 }; 950 951 f(x, y); 952 ++fCalls; 953 residual = dot(y, y); 954 bool fConverged = residual <= maxGoodResidual; 955 956 957 bool needJacobian = true; 958 uint age = maxAge; 959 960 int badPredictions; 961 962 import core.stdc.stdio; 963 964 lambda = 0; 965 iterations = 0; 966 T deltaX_dot; 967 T mu = 1; 968 enum T suspiciousMu = 16; 969 status = LeastSquaresStatus.maxIterations; 970 do 971 { 972 if (fConverged) 973 { 974 status = LeastSquaresStatus.fConverged; 975 break; 976 } 977 if (!(lambda <= maxLambda)) 978 { 979 status = LeastSquaresStatus.furtherImprovement; 980 break; 981 } 982 if (mu > suspiciousMu && age) 983 { 984 needJacobian = true; 985 age = maxAge; 986 mu = 1; 987 } 988 if (!allLessOrEqual(x, x)) 989 { 990 // cast(void) assumePure(&printf)("\n@@@@\nX != X\n@@@@\n"); 991 status = LeastSquaresStatus.numericError; 992 break; 993 } 994 if (needJacobian) 995 { 996 needJacobian = false; 997 if (age < maxAge) 998 { 999 age++; 1000 auto d = 1 / deltaX_dot; 1001 axpy(-1, y, mBuffer); // -deltaY 1002 gemv(1, J, deltaX, 1, mBuffer); //-(f_new - f_old - J_old*h) 1003 scal(-d, mBuffer); 1004 ger(1, mBuffer, deltaX, J); //J_new = J_old + u*h' 1005 } 1006 else 1007 { 1008 age = 0; 1009 if (g) 1010 { 1011 g(x, J); 1012 gCalls += 1; 1013 } 1014 else 1015 { 1016 iwork[0 .. n] = 0; 1017 tm(n, (uint totalThreads, uint threadId, uint j) 1018 @trusted pure nothrow @nogc 1019 { 1020 auto idx = totalThreads >= n ? j : threadId; 1021 auto p = JJ[idx]; 1022 if (iwork[idx]++ == 0) 1023 copy(x, p); 1024 1025 auto save = p[j]; 1026 auto xmh = save - jacobianEpsilon; 1027 auto xph = save + jacobianEpsilon; 1028 xmh = fmax(xmh, lower[j]); 1029 xph = fmin(xph, upper[j]); 1030 auto Jj = J[0 .. $, j]; 1031 if (auto twh = xph - xmh) 1032 { 1033 p[j] = xph; 1034 f(p, mBuffer); 1035 copy(mBuffer, Jj); 1036 p[j] = xmh; 1037 f(p, mBuffer); 1038 p[j] = save; 1039 axpy(-1, mBuffer, Jj); 1040 scal(1 / twh, Jj); 1041 } 1042 else 1043 { 1044 fill(T(0), Jj); 1045 } 1046 }); 1047 fCalls += iwork[0 .. n].sum; 1048 } 1049 } 1050 gemv(1, J.transposed, y, 0, Jy); 1051 if (!(Jy[Jy.iamax].fabs > gradTolerance)) 1052 { 1053 if (age == 0) 1054 { 1055 status = LeastSquaresStatus.gConverged; 1056 break; 1057 } 1058 age = maxAge; 1059 continue; 1060 } 1061 } 1062 1063 syrk(Uplo.Lower, 1, J.transposed, 0, JJ); 1064 1065 if (!(lambda >= minLambda)) 1066 { 1067 lambda = 0.001 * JJ.diagonal[JJ.diagonal.iamax]; 1068 if (!(lambda >= minLambda)) 1069 lambda = 1; 1070 } 1071 1072 copy(lower, qpl); 1073 axpy(-1, x, qpl); 1074 copy(upper, qpu); 1075 axpy(-1, x, qpu); 1076 copy(JJ.diagonal, nBuffer); 1077 JJ.diagonal[] += lambda; 1078 if (qpSettings.solveBoxQP(JJ.canonical, Jy, qpl, qpu, deltaX, false, qpwork, iwork, false) != BoxQPStatus.solved) 1079 { 1080 // cast(void) assumePure(&printf)("\n@@@@\n error in solveBoxQP\n@@@@\n"); 1081 status = LeastSquaresStatus.numericError; 1082 break; 1083 } 1084 1085 if (!allLessOrEqual(deltaX, deltaX)) 1086 { 1087 // cast(void) assumePure(&printf)("\n@@@@\ndX != dX\n@@@@\n"); 1088 status = LeastSquaresStatus.numericError; 1089 break; 1090 } 1091 1092 copy(nBuffer, JJ.diagonal); 1093 1094 axpy(1, x, deltaX); 1095 axpy(-1, x, deltaX); 1096 1097 auto newDeltaX_dot = dot(deltaX, deltaX); 1098 1099 if (!(newDeltaX_dot.sqrt < maxStep)) 1100 { 1101 lambda *= lambdaIncrease * mu; 1102 mu *= 2; 1103 continue; 1104 } 1105 1106 copy(deltaX, nBuffer); 1107 axpy(1, x, nBuffer); 1108 applyBounds(nBuffer, lower, upper); 1109 1110 ++fCalls; 1111 f(nBuffer, mBuffer); 1112 1113 auto trialResidual = dot(mBuffer, mBuffer); 1114 1115 if (!(trialResidual <= T.infinity)) 1116 { 1117 // cast(void) assumePure(&printf)("\n@@@@\n trialResidual = %e\n@@@@\n", trialResidual); 1118 status = LeastSquaresStatus.numericError; 1119 break; 1120 } 1121 1122 auto improvement = residual - trialResidual; 1123 if (!(improvement > 0)) 1124 { 1125 lambda *= lambdaIncrease * mu; 1126 mu *= 2; 1127 continue; 1128 } 1129 1130 needJacobian = true; 1131 mu = 1; 1132 iterations++; 1133 copy(nBuffer, x); 1134 swap(mBuffer, y); 1135 residual = trialResidual; 1136 fConverged = residual <= maxGoodResidual; 1137 deltaX_dot = newDeltaX_dot; 1138 1139 symv(Uplo.Lower, 1, JJ, deltaX, 2, Jy); // use Jy as temporal storage 1140 auto predictedImprovement = -dot(Jy, deltaX); 1141 1142 if (!(predictedImprovement > 0)) 1143 { 1144 status = LeastSquaresStatus.furtherImprovement; 1145 break; 1146 } 1147 1148 auto rho = predictedImprovement / improvement; 1149 1150 if (rho < minStepQuality) 1151 { 1152 lambda *= lambdaIncrease * mu; 1153 mu *= 2; 1154 } 1155 else 1156 if (rho >= goodStepQuality) 1157 { 1158 lambda = fmax(lambdaDecrease * lambda * mu, minLambda); 1159 } 1160 1161 // fmax(tolX, tolX * x.nrm2)); 1162 if (!(deltaX_dot.sqrt > absTolerance && x.nrm2 > deltaX_dot.sqrt * relTolerance)) 1163 { 1164 if (age == 0) 1165 { 1166 status = LeastSquaresStatus.xConverged; 1167 break; 1168 } 1169 age = maxAge; 1170 continue; 1171 } 1172 } 1173 while (iterations < maxIterations); 1174 } return ret; } 1175 1176 pragma(inline, false) 1177 void fill(T, SliceKind kind)(T value, Slice!(T*, 1, kind) x) 1178 { 1179 x[] = value; 1180 } 1181 1182 pragma(inline, false) 1183 bool allLessOrEqual(T)( 1184 Slice!(const(T)*) a, 1185 Slice!(const(T)*) b, 1186 ) 1187 { 1188 import mir.algorithm.iteration: all; 1189 return all!"a <= b"(a, b); 1190 } 1191 1192 uint normalizeSafety()(uint attrs) 1193 { 1194 if (attrs & FunctionAttribute.system) 1195 attrs &= ~FunctionAttribute.safe; 1196 return attrs; 1197 } 1198 1199 auto trustedAllAttr(T)(scope return T t) @trusted 1200 if (isFunctionPointer!T || isDelegate!T) 1201 { 1202 enum attrs = (functionAttributes!T & ~FunctionAttribute.system) 1203 | FunctionAttribute.pure_ 1204 | FunctionAttribute.safe 1205 | FunctionAttribute.nogc 1206 | FunctionAttribute.nothrow_; 1207 return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t; 1208 } 1209 1210 template isNullableFunction(alias f) 1211 { 1212 enum isNullableFunction = __traits(compiles, { alias F = Unqual!(typeof(f)); auto r = function(ref F e) {e = null;};} ); 1213 }