The OpenD Programming Language

1 /++
2 $(H1 Nonlinear Least Squares Solver)
3 
4 Copyright: Copyright © 2018, Symmetry Investments & Kaleidic Associates Advisory Limited
5 Authors:   Ilya Yaroshenko
6 
7 Macros:
8 NDSLICE = $(REF_ALTTEXT $(TT $2), $2, mir, ndslice, $1)$(NBSP)
9 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
10 +/
11 module mir.optim.least_squares;
12 
13 import mir.ndslice.slice: Slice, SliceKind, Contiguous, sliced;
14 import std.meta;
15 import std.traits;
16 import lapack: lapackint;
17 
18 /++
19 +/
20 enum LeastSquaresStatus
21 {
22     /// Maximum number of iterations reached
23     maxIterations = -1,
24     /// The algorithm cann't improve the solution
25     furtherImprovement,
26     /// Stationary values
27     xConverged,
28     /// Stationary gradient
29     gConverged,
30     /// Good (small) residual
31     fConverged,
32     ///
33     badBounds = -32,
34     ///
35     badGuess,
36     ///
37     badMinStepQuality,
38     ///
39     badGoodStepQuality,
40     ///
41     badStepQuality,
42     ///
43     badLambdaParams,
44     ///
45     numericError,
46 }
47 
48 version(D_Exceptions)
49 {
50     /+
51     Exception for $(LREF optimize).
52     +/
53     private static immutable leastSquaresException_maxIterations = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.maxIterations.leastSquaresStatusString);
54     private static immutable leastSquaresException_badBounds = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badBounds.leastSquaresStatusString);
55     private static immutable leastSquaresException_badGuess = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badGuess.leastSquaresStatusString);
56     private static immutable leastSquaresException_badMinStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badMinStepQuality.leastSquaresStatusString);
57     private static immutable leastSquaresException_badGoodStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badGoodStepQuality.leastSquaresStatusString);
58     private static immutable leastSquaresException_badStepQuality = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badStepQuality.leastSquaresStatusString);
59     private static immutable leastSquaresException_badLambdaParams = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.badLambdaParams.leastSquaresStatusString);
60     private static immutable leastSquaresException_numericError = new Exception("mir-optim Least Squares: " ~ LeastSquaresStatus.numericError.leastSquaresStatusString);
61     private static immutable leastSquaresExceptions = [
62         leastSquaresException_badBounds,
63         leastSquaresException_badGuess,
64         leastSquaresException_badMinStepQuality,
65         leastSquaresException_badGoodStepQuality,
66         leastSquaresException_badStepQuality,
67         leastSquaresException_badLambdaParams,
68         leastSquaresException_numericError,
69     ];
70 }
71 
72 /// Delegates for low level D API.
73 alias LeastSquaresFunction(T) = void delegate(Slice!(const(T)*) x, Slice!(T*) y) @safe nothrow @nogc pure;
74 /// ditto
75 alias LeastSquaresJacobian(T) = void delegate(Slice!(const(T)*) x, Slice!(T*, 2) J) @safe nothrow @nogc pure;
76 
77 /// Delegates for low level C API.
78 alias LeastSquaresFunctionBetterC(T) = extern(C) void function(scope void* context, size_t m, size_t n, const(T)* x, T* y) @system nothrow @nogc pure;
79 ///
80 alias LeastSquaresJacobianBetterC(T) = extern(C) void function(scope void* context, size_t m, size_t n, const(T)* x, T* J) @system nothrow @nogc pure;
81 
82 /++
83 Least-Squares iteration settings.
84 +/
85 struct LeastSquaresSettings(T)
86     if (is(T == double) || is(T == float))
87 {
88     import mir.optim.boxcqp;
89     import mir.math.common: sqrt;
90     import mir.math.constant: GoldenRatio;
91     import lapack: lapackint;
92 
93     /// Maximum number of iterations
94     uint maxIterations = 1000;
95     /// Maximum jacobian model age (0 for default selection)
96     uint maxAge;
97     /// epsilon for finite difference Jacobian approximation
98     T jacobianEpsilon = T(2) ^^ ((1 - T.mant_dig) / 2);
99     /// Absolute tolerance for step size, L2 norm
100     T absTolerance = T.epsilon;
101     /// Relative tolerance for step size, L2 norm
102     T relTolerance = 0;
103     /// Absolute tolerance for gradient, L-inf norm
104     T gradTolerance = T.epsilon;
105     /// The algorithm stops iteration when the residual value is less or equal to `maxGoodResidual`.
106     T maxGoodResidual = T.epsilon ^^ 2;
107     /// maximum norm of iteration step
108     T maxStep = T.max.sqrt / 16;
109     /// minimum trust region radius
110     T maxLambda = T.max / 16;
111     /// maximum trust region radius
112     T minLambda = T.min_normal * 16;
113     /// for steps below this quality, the trust region is shrinked
114     T minStepQuality = 0.1;
115     /// for steps above this quality, the trust region is expanded
116     T goodStepQuality = 0.5;
117     /// `lambda` is multiplied by this factor after step below min quality
118     T lambdaIncrease = 2;
119     /// `lambda` is multiplied by this factor after good quality steps
120     T lambdaDecrease = 1 / (GoldenRatio * 2);
121     /// Bound constrained convex quadratic problem settings
122     BoxQPSettings!T qpSettings;
123 }
124 
125 /++
126 Least-Squares results.
127 +/
128 struct LeastSquaresResult(T)
129     if (is(T == double) || is(T == float))
130 {
131     /// Computation status
132     LeastSquaresStatus status = LeastSquaresStatus.numericError;
133     /// Successful step count
134     uint iterations;
135     /// Number of the function calls
136     uint fCalls;
137     /// Number of the Jacobian calls
138     uint gCalls;
139     /// Final residual
140     T residual = T.infinity;
141     /// LMA variable for (inverse of) initial trust region radius
142     T lambda = 0;
143 }
144 
145 /++
146 High level D API for Levenberg-Marquardt Algorithm.
147 
148 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Levenberg-Marquardt
149 algorithm, and an estimate of the Jacobian of `f` at x.
150 
151 The function `f` should take an input vector of length `n`, and fill an output
152 vector of length `m`.
153 
154 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 
155 
156 Throws: $(LREF LeastSquaresException)
157 Params:
158     f = `n -> m` function
159     g = `m × n` Jacobian (optional)
160     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
161     settings = Levenberg-Marquardt data structure
162     taskPool = task Pool with `.parallel` method for finite difference jacobian approximation in case of g is null (optional)
163 See_also: $(LREF optimizeLeastSquares)
164 +/
165 LeastSquaresResult!T optimize(alias f, alias g = null, alias tm = null, T)(
166     scope const ref LeastSquaresSettings!T settings,
167     size_t m,
168     Slice!(T*) x,
169     Slice!(const(T)*) l,
170     Slice!(const(T)*) u,
171 )
172     if ((is(T == float) || is(T == double)))
173 {
174     auto ret = optimizeLeastSquares!(f, g, tm, T)(settings, m, x, l, u);
175     if (ret.status == -1)
176         throw leastSquaresException_maxIterations;
177     else
178     if (ret.status < -1)
179         throw leastSquaresExceptions[ret.status + 32];
180     return ret;
181 }
182 
183 /// ditto
184 LeastSquaresResult!T optimize(alias f, TaskPool, T)(
185     scope const ref LeastSquaresSettings!T settings,
186     size_t m,
187     Slice!(T*) x,
188     Slice!(const(T)*) l,
189     Slice!(const(T)*) u,
190     TaskPool taskPool)
191     if (is(T == float) || is(T == double))
192 {
193     auto tm = delegate(uint count, scope LeastSquaresTask task)
194     {
195         version(all)
196         {
197             import mir.ndslice.topology: iota;
198             foreach(i; taskPool.parallel(count.iota!uint))
199                 task(cast(uint)taskPool.size, cast(uint)(taskPool.size <= 1 ? 0 : taskPool.workerIndex - 1), i);
200         }
201         else // for debug
202         {
203             foreach(i; 0 .. count)
204                 task(1, 0, i);
205         }
206     };
207 
208     auto ret = optimizeLeastSquares!(f, null, tm, T)(settings, m, x, l, u);
209     if (ret.status == -1)
210         throw leastSquaresException_maxIterations;
211     else
212     if (ret.status < -1)
213         throw leastSquaresExceptions[ret.status + 32];
214     return ret;
215 }
216 
217 /// With Jacobian
218 version(mir_optim_test)
219 @safe unittest
220 {
221     import mir.ndslice.allocation: slice;
222     import mir.ndslice.slice: sliced;
223     import mir.blas: nrm2;
224 
225     LeastSquaresSettings!double settings;
226     auto x = [100.0, 100].sliced;
227     auto l = x.shape.slice(-double.infinity);
228     auto u = x.shape.slice(+double.infinity);
229     optimize!(
230         (x, y)
231         {
232             y[0] = x[0];
233             y[1] = 2 - x[1];
234         },
235         (x, J)
236         {
237             J[0, 0] = 1;
238             J[0, 1] = 0;
239             J[1, 0] = 0;
240             J[1, 1] = -1;
241         },
242     )(settings, 2, x, l, u);
243 
244     assert(nrm2((x - [0, 2].sliced).slice) < 1e-8);
245 }
246 
247 /// Using Jacobian finite difference approximation computed using in multiple threads.
248 version(mir_optim_test)
249 unittest
250 {
251     import mir.ndslice.allocation: slice;
252     import mir.ndslice.slice: sliced;
253     import mir.blas: nrm2;
254     import std.parallelism: taskPool;
255 
256     LeastSquaresSettings!double settings;
257     auto x = [-1.2, 1].sliced;
258     auto l = x.shape.slice(-double.infinity);
259     auto u = x.shape.slice(+double.infinity);
260     settings.optimize!(
261         (x, y) // Rosenbrock function
262         {
263             y[0] = 10 * (x[1] - x[0]^^2);
264             y[1] = 1 - x[0];
265         },
266     )(2, x, l, u, taskPool);
267 
268     // import std.stdio;
269     // writeln(settings);
270     // writeln(x);
271 
272     assert(nrm2((x - [1, 1].sliced).slice) < 1e-6);
273 }
274 
275 /// Rosenbrock
276 version(mir_optim_test)
277 @safe unittest
278 {
279     import mir.algorithm.iteration: all;
280     import mir.ndslice.allocation: slice;
281     import mir.ndslice.slice: Slice, sliced;
282     import mir.blas: nrm2;
283 
284     LeastSquaresSettings!double settings;
285     auto x = [-1.2, 1].sliced;
286     auto l = x.shape.slice(-double.infinity);
287     auto u = x.shape.slice(+double.infinity);
288 
289     alias rosenbrockRes = (x, y)
290     {
291         y[0] = 10 * (x[1] - x[0]^^2);
292         y[1] = 1 - x[0];
293     };
294 
295     alias rosenbrockJac = (x, J)
296     {
297         J[0, 0] = -20 * x[0];
298         J[0, 1] = 10;
299         J[1, 0] = -1;
300         J[1, 1] = 0;
301     };
302 
303     static class FFF
304     {
305         static auto opCall(Slice!(const(double)*) x, Slice!(double*, 2) J)
306         {
307             rosenbrockJac(x, J);
308         }
309     }
310 
311     settings.optimize!(rosenbrockRes, FFF)(2, x, l, u);
312 
313     // import std.stdio;
314 
315     // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls, " x = ", x);
316 
317     assert(nrm2((x - [1, 1].sliced).slice) < 1e-8);
318 
319     /////
320 
321     settings = settings.init;
322     x[] = [150.0, 150.0];
323     l[] = [10.0, 10.0];
324     u[] = [200.0, 200.0];
325 
326     settings.optimize!(rosenbrockRes, rosenbrockJac)(2, x, l, u);
327 
328     // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls, " ", x);
329     assert(nrm2((x - [10, 100].sliced).slice) < 1e-5);
330     assert(x.all!"a >= 10");
331 }
332 
333 ///
334 version(mir_optim_test)
335 @safe unittest
336 {
337     import mir.blas: nrm2;
338     import mir.math.common;
339     import mir.ndslice.allocation: slice;
340     import mir.ndslice.topology: linspace, map;
341     import mir.ndslice.slice: sliced;
342     import mir.random;
343     import mir.random.algorithm;
344     import mir.random.variable;
345     import std.parallelism: taskPool;
346 
347     alias model = (x, p) => p[0] * map!exp(-x * p[1]);
348 
349     auto p = [1.0, 2.0];
350 
351     auto xdata = [20].linspace([0.0, 10.0]);
352     auto rng = Random(12345);
353     auto ydata = slice(model(xdata, p) + 0.01 * rng.randomSlice(normalVar, xdata.shape));
354 
355     auto x = [0.5, 0.5].sliced;
356     auto l = x.shape.slice(-double.infinity);
357     auto u = x.shape.slice(+double.infinity);
358 
359     LeastSquaresSettings!double settings;
360     settings.optimize!((p, y) => y[] = model(xdata, p) - ydata)(ydata.length, x, l, u);
361 
362     assert((x - [1.0, 2.0].sliced).slice.nrm2 < 0.05);
363 }
364 
365 ///
366 version(mir_optim_test)
367 @safe pure unittest
368 {
369     import mir.algorithm.iteration: all;
370     import mir.ndslice.allocation: slice;
371     import mir.ndslice.topology: map, repeat, iota;
372     import mir.ndslice.slice: sliced;
373     import mir.random;
374     import mir.random.variable;
375     import mir.random.algorithm;
376     import mir.math.common;
377 
378     alias model = (x, p) => p[0] * map!exp(-x / p[1]) + p[2];
379 
380     auto xdata = iota([100], 1);
381     auto rng = Random(12345);
382     auto ydata = slice(model(xdata, [10.0, 10.0, 10.0]) + 0.1 * rng.randomSlice(normalVar, xdata.shape));
383 
384     LeastSquaresSettings!double settings;
385 
386     auto x = [15.0, 15.0, 15.0].sliced;
387     auto l = [5.0, 11.0, 5.0].sliced;
388     auto u = x.shape.slice(+double.infinity);
389 
390     settings.optimize!((p, y) => y[] = model(xdata, p) - ydata)
391         (ydata.length, x, l, u);
392 
393     assert(all!"a >= b"(x, l));
394 
395     // import std.stdio;
396 
397     // writeln(x);
398     // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls);
399 
400     settings = settings.init;
401     x[] = [5.0, 5.0, 5.0];
402     l[] = -double.infinity;
403     u[] = [15.0, 9.0, 15.0];
404     settings.optimize!((p, y) => y[] = model(xdata, p) - ydata)
405         (ydata.length, x, l , u);
406 
407     assert(x.all!"a <= b"(u));
408 
409     // writeln(x);
410     // writeln(settings.iterations, " ", settings.fCalls, " ", settings.gCalls);
411 }
412 
413 ///
414 version(mir_optim_test)
415 @safe pure unittest
416 {
417     import mir.blas: nrm2;
418     import mir.math.common: sqrt;
419     import mir.ndslice.allocation: slice;
420     import mir.ndslice.slice: sliced;
421 
422     LeastSquaresSettings!double settings;
423     auto x = [0.001, 0.0001].sliced;
424     auto l = [-0.5, -0.5].sliced;
425     auto u = [0.5, 0.5].sliced;
426     settings.optimize!(
427         (x, y)
428         {
429             y[0] = sqrt(1 - (x[0] ^^ 2 + x[1] ^^ 2));
430         },
431     )(1, x, l, u);
432 
433     assert(nrm2((x - u).slice) < 1e-8);
434 }
435 
436 /++
437 High level nothtow D API for Levenberg-Marquardt Algorithm.
438 
439 Computes the argmin over x of `sum_i(f(x_i)^2)` using the Least-Squares
440 algorithm, and an estimate of the Jacobian of `f` at x.
441 
442 The function `f` should take an input vector of length `n`, and fill an output
443 vector of length `m`.
444 
445 The function `g` is the Jacobian of `f`, and should fill a row-major `m x n` matrix. 
446 
447 Returns: optimization status.
448 Params:
449     f = `n -> m` function
450     g = `m × n` Jacobian (optional)
451     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
452     settings = Levenberg-Marquardt data structure
453     m = length (dimension) of `y = f(x)`
454     x = initial (in) and final (out) X value
455     l = lower X bound
456     u = upper X bound
457 See_also: $(LREF optimize)
458 +/
459 LeastSquaresResult!T optimizeLeastSquares(alias f, alias g = null, alias tm = null, T)(
460     scope const ref LeastSquaresSettings!T settings,
461     size_t m,
462     Slice!(T*) x,
463     Slice!(const(T)*) l,
464     Slice!(const(T)*) u,
465 )
466 {
467     scope fInst = delegate(Slice!(const(T)*) x, Slice!(T*) y)
468     {
469         f(x, y);
470     };
471     if (false)
472     {
473         fInst(x, x);
474     }
475     static if (is(typeof(g) == typeof(null)))
476         enum LeastSquaresJacobian!T gInst = null;
477     else
478     {
479         scope gInst = delegate(Slice!(const(T)*) x, Slice!(T*, 2) J)
480         {
481             g(x, J);
482         };
483         static if (isNullableFunction!(g))
484             if (!g)
485                 gInst = null;
486         if (false)
487         {
488             Slice!(T*, 2) J;
489             gInst(x, J);
490         }
491     }
492 
493     static if (is(typeof(tm) == typeof(null)))
494         enum LeastSquaresThreadManager tmInst = null;
495     else
496     {
497         scope tmInst = delegate(
498             uint count,
499             scope LeastSquaresTask task)
500         {
501             tm(count, task);
502         };
503         static if (isNullableFunction!(tm))
504             if (!tm)
505                 tmInst = null;
506         if (false) with(settings)
507             tmInst(0, null);
508     }
509 
510     auto n = x.length;
511     import mir.ndslice.allocation: rcslice;
512     auto work = rcslice!T(mir_least_squares_work_length(m, n));
513     auto iwork = rcslice!lapackint(mir_least_squares_iwork_length(m, n));
514     auto workS = work.lightScope;
515     auto iworkS = iwork.lightScope;
516     return optimizeLeastSquares!T(settings, m, x, l, u, workS, iworkS, fInst.trustedAllAttr, gInst.trustedAllAttr, tmInst.trustedAllAttr);
517 }
518 
519 /++
520 Status string for low (extern) and middle (nothrow) levels D API.
521 Params:
522     st = optimization status
523 Returns: description for $(LeastSquaresStatus)furtherImprovement
524 +/
525 pragma(inline, false)
526 string leastSquaresStatusString(LeastSquaresStatus st) @safe pure nothrow @nogc
527 {
528     final switch(st) with(LeastSquaresStatus)
529     {
530         case furtherImprovement:
531             return "The algorithm cann't improve the solution";
532         case maxIterations:
533             return "Maximum number of iterations reached";
534         case xConverged:
535             return "X converged";
536         case gConverged:
537             return "Jacobian converged";
538         case fConverged:
539             return "Residual is small enough";
540         case badBounds:
541             return "Initial guess must be within bounds.";
542         case badGuess:
543             return "Initial guess must be an array of finite numbers.";
544         case badMinStepQuality:
545             return "0 <= minStepQuality < 1 must hold.";
546         case badGoodStepQuality:
547             return "0 < goodStepQuality <= 1 must hold.";
548         case badStepQuality:
549             return "minStepQuality < goodStepQuality must hold.";
550         case badLambdaParams:
551             return "1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt and T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1 must hold.";
552         case numericError:
553             return "Numeric Error";
554     }
555 }
556 
557 ///
558 alias LeastSquaresTask = void delegate(
559         uint totalThreads,
560         uint threadId,
561         uint i)
562     @safe nothrow @nogc pure;
563 
564 ///
565 alias LeastSquaresTaskBetterC = extern(C) void function(
566         scope const LeastSquaresTask,
567         uint totalThreads,
568         uint threadId,
569         uint i)
570     @safe nothrow @nogc pure;
571 
572 /// Thread manager delegate type for low level `extern(D)` API.
573 alias LeastSquaresThreadManager = void delegate(
574         uint count,
575         scope LeastSquaresTask task)
576     @safe nothrow @nogc pure;
577 
578 /++
579 Low level `extern(D)` instatiation.
580 Params:
581     settings = Levenberg-Marquardt data structure
582     m = length (dimension) of `y = f(x)`
583     x = initial (in) and final (out) X value
584     l = lower X bound
585     u = upper X bound
586     f = `n -> m` function
587     g = `m × n` Jacobian (optional)
588     tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
589     work = floating point workspace length of at least $(LREF mir_least_squares_work_length)
590     iwork = floating point workspace length of at least $(LREF mir_least_squares_iwork_length)
591 +/
592 pragma(inline, false)
593 LeastSquaresResult!double optimizeLeastSquaresD
594     (
595         scope const ref LeastSquaresSettings!double settings,
596         size_t m,
597         Slice!(double*) x,
598         Slice!(const(double)*) l,
599         Slice!(const(double)*) u,
600         Slice!(double*) work,
601         Slice!(lapackint*) iwork,
602         scope LeastSquaresFunction!double f,
603         scope LeastSquaresJacobian!double g = null,
604         scope LeastSquaresThreadManager tm = null,
605     ) @trusted nothrow @nogc pure
606 {
607     return optimizeLeastSquaresImplGeneric!double(settings, m, x, l, u, work, iwork, f, g, tm);
608 }
609 
610 
611 /// ditto
612 pragma(inline, false)
613 LeastSquaresResult!float optimizeLeastSquaresS
614     (
615         scope const ref LeastSquaresSettings!float settings,
616         size_t m,
617         Slice!(float*) x,
618         Slice!(const(float)*) l,
619         Slice!(const(float)*) u,
620         Slice!(float*) work,
621         Slice!(lapackint*) iwork,
622         scope LeastSquaresFunction!float f,
623         scope LeastSquaresJacobian!float g = null,
624         scope LeastSquaresThreadManager tm = null,
625     ) @trusted nothrow @nogc pure
626 {
627     return optimizeLeastSquaresImplGeneric!float(settings, 2, x, l, u, work, iwork, f, g, tm);
628 }
629 
630 /// ditto
631 alias optimizeLeastSquares(T : double) = optimizeLeastSquaresD;
632 /// ditto
633 alias optimizeLeastSquares(T : float) = optimizeLeastSquaresS;
634 
635 extern(C) @safe nothrow @nogc
636 {
637     /++
638     +/
639     @safe pure nothrow @nogc
640     size_t mir_least_squares_work_length(size_t m, size_t n)
641     {
642         import mir.optim.boxcqp: mir_box_qp_work_length;
643         return mir_box_qp_work_length(n) + n * 5 + n ^^ 2 + n * m + m * 2;
644     }
645 
646     /++
647     +/
648     @safe pure nothrow @nogc
649     size_t mir_least_squares_iwork_length(size_t m, size_t n)
650     {
651         import mir.utility: max;
652         import mir.optim.boxcqp: mir_box_qp_iwork_length;
653         return max(mir_box_qp_iwork_length(n), n);
654     }
655 
656     /++
657     Status string for extern(C) API.
658     Params:
659         st = optimization status
660     Returns: description for $(LeastSquaresStatus)
661     +/
662     extern(C)
663     pragma(inline, false)
664     immutable(char)* mir_least_squares_status_string(LeastSquaresStatus st) @trusted pure nothrow @nogc
665     {
666         return st.leastSquaresStatusString.ptr;
667     }
668 
669     /// Thread manager function type for low level `extern(C)` API.
670     alias LeastSquaresThreadManagerBetterC =
671         extern(C) void function(
672             scope void* context,
673             uint count,
674             scope const LeastSquaresTask taskContext,
675             scope LeastSquaresTaskBetterC task)
676             @system nothrow @nogc pure;
677 
678     /++
679     Low level `extern(C)` wrapper instatiation.
680     Params:
681         settings = Levenberg-Marquardt data structure
682         fContext = context for the function
683         f = `n -> m` function
684         gContext = context for the Jacobian (optional)
685         g = `m × n` Jacobian (optional)
686         tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
687         m = length (dimension) of `y = f(x)`
688         n = length (dimension) of X
689         x = initial (in) and final (out) X value
690         l = lower X bound
691         u = upper X bound
692         f = `n -> m` function
693         fContext = `f` context
694         g = `m × n` Jacobian (optional)
695         gContext = `g` context
696         tm = thread manager for finite difference jacobian approximation in case of g is null (optional)
697         tmContext = `tm` context
698         work = floating point workspace length of at least $(LREF mir_least_squares_work_length)
699         iwork = floating point workspace length of at least $(LREF mir_least_squares_iwork_length)
700     +/
701     extern(C)
702     pragma(inline, false)
703     LeastSquaresResult!double mir_optimize_least_squares_d
704         (
705             scope const ref LeastSquaresSettings!double settings,
706             size_t m,
707             size_t n,
708             double* x,
709             const(double)* l,
710             const(double)* u,
711             Slice!(double*) work,
712             Slice!(lapackint*) iwork,
713             scope void* fContext,
714             scope LeastSquaresFunctionBetterC!double f,
715             scope void* gContext = null,
716             scope LeastSquaresJacobianBetterC!double g = null,
717             scope void* tmContext = null,
718             scope LeastSquaresThreadManagerBetterC tm = null,
719         ) @system nothrow @nogc pure
720     {
721         return optimizeLeastSquaresImplGenericBetterC!double(settings, m, n, x, l, u, work, iwork, fContext, f, gContext, g, tmContext, tm);
722     }
723 
724     /// ditto
725     extern(C)
726     pragma(inline, false)
727     LeastSquaresResult!float mir_optimize_least_squares_s
728         (
729             scope const ref LeastSquaresSettings!float settings,
730             size_t m,
731             size_t n,
732             float* x,
733             const(float)* l,
734             const(float)* u,
735             Slice!(float*) work,
736             Slice!(lapackint*) iwork,
737             scope void* fContext,
738             scope LeastSquaresFunctionBetterC!float f,
739             scope void* gContext = null,
740             scope LeastSquaresJacobianBetterC!float g = null,
741             scope void* tmContext = null,
742             scope LeastSquaresThreadManagerBetterC tm = null,
743         ) @system nothrow @nogc pure
744     {
745         return optimizeLeastSquaresImplGenericBetterC!float(settings, m, n, x, l, u, work, iwork, fContext, f, gContext, g, tmContext, tm);
746     }
747 
748     /// ditto
749     alias mir_optimize_least_squares(T : double) = mir_optimize_least_squares_d;
750 
751     /// ditto
752     alias mir_optimize_least_squares(T : float) = mir_optimize_least_squares_s;
753 
754     /++
755     Initialize LM data structure with default params for iteration.
756     Params:
757         settings = Levenberg-Marquart data structure
758     +/
759     void mir_least_squares_init_d(ref LeastSquaresSettings!double settings) pure
760     {
761         settings = settings.init;
762     }
763 
764     /// ditto
765     void mir_least_squares_init_s(ref LeastSquaresSettings!float settings) pure
766     {
767         settings = settings.init;
768     }
769 
770     /// ditto
771     alias mir_least_squares_init(T : double) = mir_least_squares_init_d;
772 
773     /// ditto
774     alias mir_least_squares_init(T : float) = mir_least_squares_init_s;
775 
776     /++
777     Resets all counters and flags, fills `x`, `y`, `upper`, `lower`, vecors with default values.
778     Params:
779         settings = Levenberg-Marquart data structure
780     +/
781     void mir_least_squares_reset_d(ref LeastSquaresSettings!double settings) pure
782     {
783         settings = settings.init;
784     }
785 
786     /// ditto
787     void mir_least_squares_reset_s(ref LeastSquaresSettings!float settings) pure
788     {
789         settings = settings.init;
790     }
791 
792     /// ditto
793     alias mir_least_squares_reset(T : double) = mir_least_squares_reset_d;
794 
795     /// ditto
796     alias mir_least_squares_reset(T : float) = mir_least_squares_reset_s;
797 }
798 
799 private:
800 
801 LeastSquaresResult!T optimizeLeastSquaresImplGenericBetterC(T)
802     (
803         scope const ref LeastSquaresSettings!T settings,
804         size_t m,
805         size_t n,
806         T* x,
807         const(T)* l,
808         const(T)* u,
809         Slice!(T*) work,
810         Slice!(lapackint*) iwork,
811         scope void* fContext,
812         scope LeastSquaresFunctionBetterC!T f,
813         scope void* gContext,
814         scope LeastSquaresJacobianBetterC!T g,
815         scope void* tmContext,
816         scope LeastSquaresThreadManagerBetterC tm,
817     ) @system nothrow @nogc pure
818 {
819     version(LDC) pragma(inline, true);
820 
821     if (g)
822         return optimizeLeastSquares!T(
823             settings,
824             m,
825             x[0 .. n].sliced,
826             l[0 .. n].sliced,
827             u[0 .. n].sliced,
828             work,
829             iwork,
830             (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
831             (x, J) @trusted => g(gContext, J.length, x.length, x.iterator, J.iterator),
832             null
833         );
834 
835     LeastSquaresTaskBetterC taskFunction = (scope const LeastSquaresTask context, uint totalThreads, uint threadId, uint i) @trusted
836     {
837         context(totalThreads, threadId, i);
838     };
839 
840     if (tm)
841         return optimizeLeastSquares!T(
842             settings,
843             m,
844             x[0 .. n].sliced,
845             l[0 .. n].sliced,
846             u[0 .. n].sliced,
847             work,
848             iwork,
849             (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
850             null,
851             (count, scope LeastSquaresTask task) @trusted => tm(tmContext, count, task, taskFunction)
852         );
853     return optimizeLeastSquares!T(
854         settings,
855         m,
856         x[0 .. n].sliced,
857         l[0 .. n].sliced,
858         u[0 .. n].sliced,
859         work,
860         iwork,
861         (x, y) @trusted => f(fContext, y.length, x.length, x.iterator, y.iterator),
862         null,
863         null
864     );
865 }
866 
867 // private auto assumePure(T)(T t)
868 // if (isFunctionPointer!T || isDelegate!T)
869 // {
870 //     enum attrs = functionAttributes!T | FunctionAttribute.pure_;
871 //     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
872 // }
873 
874 // LM algorithm
875 LeastSquaresResult!T optimizeLeastSquaresImplGeneric(T)
876     (
877         scope const ref LeastSquaresSettings!T settings,
878         size_t m,
879         Slice!(T*) x,
880         Slice!(const(T)*) lower,
881         Slice!(const(T)*) upper,
882         Slice!(T*) work,
883         Slice!(lapackint*) iwork,
884         scope LeastSquaresFunction!T f,
885         scope LeastSquaresJacobian!T g,
886         scope LeastSquaresThreadManager tm,
887     ) @trusted nothrow @nogc pure
888 { typeof(return) ret; with(ret) with(settings){
889     pragma(inline, false);
890     import mir.algorithm.iteration: all;
891     import mir.blas: axpy, gemv, scal, ger, copy, axpy, dot, swap, symv, iamax, syrk, Uplo, nrm2;
892     import mir.math.common;
893     import mir.math.sum: sum;
894     import mir.ndslice.allocation: stdcUninitSlice;
895     import mir.ndslice.dynamic: transposed;
896     import mir.ndslice.slice: sliced;
897     import mir.ndslice.topology: canonical, diagonal;
898     import mir.optim.boxcqp;
899     import mir.utility: max;
900     import mir.algorithm.iteration;
901     import core.stdc.stdio;
902 
903     debug
904     {
905         work[] = 0;
906         iwork[] = 0;
907     }
908 
909     auto n = cast(uint)x.length;
910 
911     auto deltaX = work[0 .. n]; work = work[n .. $];
912     auto Jy = work[0 .. n]; work = work[n .. $];
913     auto nBuffer = work[0 .. n]; work = work[n .. $];
914 
915     auto JJ = work[0 .. n ^^ 2].sliced(n, n); work = work[n ^^ 2 .. $];
916     auto J = work[0 .. m * n].sliced(m, n); work = work[m * n .. $];
917 
918     auto y = work[0 .. m]; work = work[m .. $];
919     auto mBuffer = work[0 .. m]; work = work[m .. $];
920 
921     auto qpl = work[0 .. n]; work = work[n .. $];
922     auto qpu = work[0 .. n]; work = work[n .. $];
923 
924     auto qpwork = work;
925 
926     version(LDC) pragma(inline, true);
927 
928     if (m == 0 || n == 0 || !x.all!"-a.infinity < a && a < a.infinity")
929         { status = LeastSquaresStatus.badGuess; return ret; }
930     if (!allLessOrEqual(lower, x) || !allLessOrEqual(x, upper))
931         { status = LeastSquaresStatus.badBounds; return ret; }
932     if (!(0 <= minStepQuality && minStepQuality < 1))
933         { status = LeastSquaresStatus.badMinStepQuality; return ret; }
934     if (!(0 <= goodStepQuality && goodStepQuality <= 1))
935         { status = LeastSquaresStatus.badGoodStepQuality; return ret; }
936     if (!(minStepQuality < goodStepQuality))
937         { status = LeastSquaresStatus.badStepQuality; return ret; }
938     if (!(1 <= lambdaIncrease && lambdaIncrease <= T.max.sqrt))
939         { status = LeastSquaresStatus.badLambdaParams; return ret; }
940     if (!(T.min_normal.sqrt <= lambdaDecrease && lambdaDecrease <= 1))
941         { status = LeastSquaresStatus.badLambdaParams; return ret; }
942 
943     auto maxAge = settings.maxAge ? settings.maxAge : g ? 3 : 2 * n;
944 
945     if (!tm) tm = delegate(uint count, scope LeastSquaresTask task) pure @nogc nothrow @trusted
946     {
947         foreach(i; 0 .. count)
948             task(1, 0, i);
949     };
950 
951     f(x, y);
952     ++fCalls;
953     residual = dot(y, y);
954     bool fConverged = residual <= maxGoodResidual;
955 
956 
957     bool needJacobian = true;
958     uint age = maxAge;
959 
960     int badPredictions;
961 
962     import core.stdc.stdio;
963 
964     lambda = 0;
965     iterations = 0;
966     T deltaX_dot;
967     T mu = 1;
968     enum T suspiciousMu = 16;
969     status = LeastSquaresStatus.maxIterations;
970     do
971     {
972         if (fConverged)
973         {
974             status = LeastSquaresStatus.fConverged;
975             break;
976         }
977         if (!(lambda <= maxLambda))
978         {
979             status = LeastSquaresStatus.furtherImprovement;
980             break;
981         }
982         if (mu > suspiciousMu && age)
983         {
984             needJacobian = true;
985             age = maxAge;
986             mu = 1;
987         }
988         if (!allLessOrEqual(x, x))
989         {
990             // cast(void) assumePure(&printf)("\n@@@@\nX != X\n@@@@\n");
991             status = LeastSquaresStatus.numericError;
992             break;
993         }
994         if (needJacobian)
995         {
996             needJacobian = false;
997             if (age < maxAge)
998             {
999                 age++;
1000                 auto d = 1 / deltaX_dot;
1001                 axpy(-1, y, mBuffer); // -deltaY
1002                 gemv(1, J, deltaX, 1, mBuffer); //-(f_new - f_old - J_old*h)
1003                 scal(-d, mBuffer);
1004                 ger(1, mBuffer, deltaX, J); //J_new = J_old + u*h'
1005             }
1006             else
1007             {
1008                 age = 0;
1009                 if (g)
1010                 {
1011                     g(x, J);
1012                     gCalls += 1;
1013                 }
1014                 else
1015                 {
1016                     iwork[0 .. n] = 0;
1017                     tm(n, (uint totalThreads, uint threadId, uint j)
1018                         @trusted pure nothrow @nogc
1019                         {
1020                             auto idx = totalThreads >= n ? j : threadId;
1021                             auto p = JJ[idx];
1022                             if (iwork[idx]++ == 0)
1023                                 copy(x, p);
1024 
1025                             auto save = p[j];
1026                             auto xmh = save - jacobianEpsilon;
1027                             auto xph = save + jacobianEpsilon;
1028                             xmh = fmax(xmh, lower[j]);
1029                             xph = fmin(xph, upper[j]);
1030                             auto Jj = J[0 .. $, j];
1031                             if (auto twh = xph - xmh)
1032                             {
1033                                 p[j] = xph;
1034                                 f(p, mBuffer);
1035                                 copy(mBuffer, Jj);
1036                                 p[j] = xmh;
1037                                 f(p, mBuffer);
1038                                 p[j] = save;
1039                                 axpy(-1, mBuffer, Jj);
1040                                 scal(1 / twh, Jj);
1041                             }
1042                             else
1043                             {
1044                                 fill(T(0), Jj);
1045                             }
1046                         });
1047                     fCalls += iwork[0 .. n].sum;
1048                 }
1049             }
1050             gemv(1, J.transposed, y, 0, Jy);
1051             if (!(Jy[Jy.iamax].fabs > gradTolerance))
1052             {
1053                 if (age == 0)
1054                 {
1055                     status = LeastSquaresStatus.gConverged;
1056                     break;
1057                 }
1058                 age = maxAge;
1059                 continue;
1060             }
1061         }
1062 
1063         syrk(Uplo.Lower, 1, J.transposed, 0, JJ);
1064 
1065         if (!(lambda >= minLambda))
1066         {
1067             lambda = 0.001 * JJ.diagonal[JJ.diagonal.iamax];
1068             if (!(lambda >= minLambda))
1069                 lambda = 1;
1070         }
1071 
1072         copy(lower, qpl);
1073         axpy(-1, x, qpl);
1074         copy(upper, qpu);
1075         axpy(-1, x, qpu);
1076         copy(JJ.diagonal, nBuffer);
1077         JJ.diagonal[] += lambda;
1078         if (qpSettings.solveBoxQP(JJ.canonical, Jy, qpl, qpu, deltaX, false, qpwork, iwork, false) != BoxQPStatus.solved)
1079         {
1080             // cast(void) assumePure(&printf)("\n@@@@\n error in solveBoxQP\n@@@@\n");
1081             status = LeastSquaresStatus.numericError;
1082             break;
1083         }
1084 
1085         if (!allLessOrEqual(deltaX, deltaX))
1086         {
1087             // cast(void) assumePure(&printf)("\n@@@@\ndX != dX\n@@@@\n");
1088             status = LeastSquaresStatus.numericError;
1089             break;
1090         }
1091 
1092         copy(nBuffer, JJ.diagonal);
1093 
1094         axpy(1, x, deltaX);
1095         axpy(-1, x, deltaX);
1096 
1097         auto newDeltaX_dot = dot(deltaX, deltaX);
1098 
1099         if (!(newDeltaX_dot.sqrt < maxStep))
1100         {
1101             lambda *= lambdaIncrease * mu;
1102             mu *= 2;
1103             continue;
1104         }
1105 
1106         copy(deltaX, nBuffer);
1107         axpy(1, x, nBuffer);
1108         applyBounds(nBuffer, lower, upper);
1109 
1110         ++fCalls;
1111         f(nBuffer, mBuffer);
1112 
1113         auto trialResidual = dot(mBuffer, mBuffer);
1114 
1115         if (!(trialResidual <= T.infinity))
1116         {
1117             // cast(void) assumePure(&printf)("\n@@@@\n trialResidual = %e\n@@@@\n", trialResidual);
1118             status = LeastSquaresStatus.numericError;
1119             break;
1120         }
1121 
1122         auto improvement = residual - trialResidual;
1123         if (!(improvement > 0))
1124         {
1125             lambda *= lambdaIncrease * mu;
1126             mu *= 2;
1127             continue;
1128         }
1129 
1130         needJacobian = true;
1131         mu = 1;
1132         iterations++;
1133         copy(nBuffer, x);
1134         swap(mBuffer, y);
1135         residual = trialResidual;
1136         fConverged = residual <= maxGoodResidual;
1137         deltaX_dot = newDeltaX_dot;
1138 
1139         symv(Uplo.Lower, 1, JJ, deltaX, 2, Jy); // use Jy as temporal storage
1140         auto predictedImprovement = -dot(Jy, deltaX);
1141 
1142         if (!(predictedImprovement > 0))
1143         {
1144             status = LeastSquaresStatus.furtherImprovement;
1145             break;
1146         }
1147 
1148         auto rho = predictedImprovement / improvement;
1149 
1150         if (rho < minStepQuality)
1151         {
1152             lambda *= lambdaIncrease * mu;
1153             mu *= 2;
1154         }
1155         else
1156         if (rho >= goodStepQuality)
1157         {
1158             lambda = fmax(lambdaDecrease * lambda * mu, minLambda);
1159         }
1160 
1161         // fmax(tolX, tolX * x.nrm2));
1162         if (!(deltaX_dot.sqrt > absTolerance && x.nrm2 > deltaX_dot.sqrt * relTolerance))
1163         {
1164             if (age == 0)
1165             {
1166                 status = LeastSquaresStatus.xConverged;
1167                 break;
1168             }
1169             age = maxAge;
1170             continue;
1171         }
1172     }
1173     while (iterations < maxIterations);
1174 } return ret; }
1175 
1176 pragma(inline, false)
1177 void fill(T, SliceKind kind)(T value, Slice!(T*, 1, kind) x)
1178 {
1179     x[] = value;
1180 }
1181 
1182 pragma(inline, false)
1183 bool allLessOrEqual(T)(
1184     Slice!(const(T)*) a,
1185     Slice!(const(T)*) b,
1186     )
1187 {
1188     import mir.algorithm.iteration: all;
1189     return all!"a <= b"(a, b);
1190 }
1191 
1192 uint normalizeSafety()(uint attrs)
1193 {
1194     if (attrs & FunctionAttribute.system)
1195         attrs &= ~FunctionAttribute.safe;
1196     return attrs;
1197 }
1198 
1199 auto trustedAllAttr(T)(scope return T t) @trusted
1200     if (isFunctionPointer!T || isDelegate!T)
1201 {
1202     enum attrs = (functionAttributes!T & ~FunctionAttribute.system) 
1203         | FunctionAttribute.pure_
1204         | FunctionAttribute.safe
1205         | FunctionAttribute.nogc
1206         | FunctionAttribute.nothrow_;
1207     return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
1208 }
1209 
1210 template isNullableFunction(alias f)
1211 {
1212     enum isNullableFunction = __traits(compiles, { alias F = Unqual!(typeof(f)); auto r = function(ref F e) {e = null;};} );
1213 }