The OpenD Programming Language

1 /++
2 $(H1 Bound Constrained Convex Quadratic Problem Solver)
3 
4 Paper: $(HTTP www.cse.uoi.gr/tech_reports/publications/boxcqp.pdf, BOXCQP: AN ALGORITHM FOR BOUND CONSTRAINED CONVEX QUADRATIC PROBLEMS)
5 
6 Copyright: Copyright © 2020, Symmetry Investments & Kaleidic Associates Advisory Limited
7 Authors:   Ilya Yaroshenko
8 +/
9 module mir.optim.boxcqp;
10 
11 import mir.ndslice.slice: Slice, Canonical;
12 import lapack: lapackint;
13 import mir.math.common: fmin, fmax, sqrt, fabs;
14 
15 /++
16 BOXCQP Exit Status
17 +/
18 enum BoxQPStatus
19 {
20     ///
21     solved,
22     ///
23     numericError,
24     ///
25     maxIterations,
26 }
27 
28 // ? Not compatible with Intel MKL _posvx
29 // version = boxcqp_compact;
30 
31 extern(C) @safe nothrow @nogc
32 {
33     /++
34     +/
35     @safe pure nothrow @nogc
36     size_t mir_box_qp_work_length(size_t n)
37     {
38         version(boxcqp_compact)
39             return n ^^ 2 + n * 8;
40         else
41             return n ^^ 2 * 2 + n * 8;
42     }
43 
44     /++
45     +/
46     @safe pure nothrow @nogc
47     size_t mir_box_qp_iwork_length(size_t n)
48     {
49         return n + (n / lapackint.sizeof + (n % lapackint.sizeof != 0));
50     }
51 }
52 
53 /++
54 BOXCQP Algorithm Settings
55 +/
56 struct BoxQPSettings(T)
57     if (is(T == float) || is(T == double))
58 {
59     /++
60     Relative active constraints tolerance.
61     +/
62     T relTolerance = T.epsilon * 16;
63     /++
64     Absolute active constraints tolerance.
65     +/
66     T absTolerance = T.epsilon * 16;
67     /++
68     Maximal iterations allowed. `0` is used for default value equals to `10 * N + 100`.
69     +/
70     uint maxIterations = 0;
71 }
72 
73 /++
74 Solves:
75     `argmin_x(xPx + qx) : l <= x <= u`
76 Params:
77     P = Positive-definite Matrix, NxN
78     q = Linear component, N
79     l = Lower bounds in `[-inf, +inf$(RPAREN)`, N
80     u = Upper bounds in `$(LPAREN)-inf, +inf]`, N
81     x = solutoin, N
82     settings = Iteration settings (optional)
83 +/
84 @safe pure nothrow @nogc
85 BoxQPStatus solveBoxQP(T)(
86     Slice!(T*, 2, Canonical) P,
87     Slice!(const(T)*) q,
88     Slice!(const(T)*) l,
89     Slice!(const(T)*) u,
90     Slice!(T*) x,
91     BoxQPSettings!T settings = BoxQPSettings!T.init,
92     )
93     if (is(T == float) || is(T == double))
94 {
95     import mir.ndslice.allocation: rcslice;
96     auto n = q.length;
97     auto work = rcslice!T(mir_box_qp_work_length(n));
98     auto iwork = rcslice!lapackint(mir_box_qp_iwork_length(n));
99     auto workS = work.lightScope;
100     auto iworkS = iwork.lightScope;
101     return solveBoxQP(settings, P, q, l, u, x, false, workS, iworkS, true);
102 }
103 
104 /++
105 Solves:
106     `argmin_x(xPx + qx) : l <= x <= u`
107 Params:
108     settings = Iteration settings
109     P = Positive-definite Matrix (in lower triangular part is store), NxN.
110         The upper triangular part (and diagonal) of the matrix is used for temporary data and then can be resotored.
111         Matrix diagonal is always restored.
112     q = Linear component, N
113     l = Lower bounds in `[-inf, +inf$(RPAREN)`, N
114     u = Upper bounds in `$(LPAREN)-inf, +inf]`, N
115     x = solutoin, N
116     unconstrainedSolution = 
117     work = workspace, $(LREF mir_box_qp_work_length)(N)
118     iwork = integer workspace, $(LREF mir_box_qp_iwork_length)(N)
119     restoreUpperP = (optional) restore upper triangular part of P
120 +/
121 @safe pure nothrow @nogc
122 BoxQPStatus solveBoxQP(T)(
123     ref const BoxQPSettings!T settings,
124     Slice!(T*, 2, Canonical) P,
125     Slice!(const(T)*) q,
126     Slice!(const(T)*) l,
127     Slice!(const(T)*) u,
128     Slice!(T*) x,
129     bool unconstrainedSolution,
130     Slice!(T*) work,
131     Slice!(lapackint*) iwork,
132     bool restoreUpperP = true,
133 )
134     if (is(T == float) || is(T == double))
135 in {
136     auto n = q.length;
137     assert(P.length!0 == n);
138     assert(P.length!1 == n);
139     assert(q.length == n);
140     assert(l.length == n);
141     assert(u.length == n);
142     assert(x.length == n);
143     assert(work.length >= mir_box_qp_work_length(n));
144     assert(iwork.length >= mir_box_qp_iwork_length(n));
145 }
146 do {
147     import mir.blas: dot, copy;
148     import mir.lapack: posvx;
149     import mir.math.sum;
150     import mir.ndslice.slice: sliced;
151     import mir.ndslice.topology: canonical, diagonal;
152 
153     enum Flag : byte
154     {
155         l = -1,
156         s = 0,
157         u = 1,
158     }
159 
160     auto n = q.length;
161 
162     if (n == 0)
163         return BoxQPStatus.solved;
164 
165     auto bwork = iwork[n .. $];
166     iwork = iwork[0 .. n];
167 
168     if (!unconstrainedSolution)
169     {
170         auto buffer = work;
171         auto Pdiagonal = buffer[0 .. n]; buffer = buffer[n .. $];
172         auto scaling = buffer[0 .. n]; buffer = buffer[n .. $];
173         auto b = buffer[0 .. n]; buffer = buffer[n .. $];
174         auto lapackWorkSpace = buffer[0 .. n * 3]; buffer = buffer[n * 3 .. $];
175         auto F = buffer[0 .. n ^^ 2].sliced(n, n); buffer = buffer[n ^^ 2 .. $];
176 
177         version(boxcqp_compact)
178         {
179             foreach(i; 1 .. n)
180                 copy(P[i, 0 .. i], P[0 .. i, i]);
181             copy(P.diagonal, Pdiagonal);
182             alias A = P;
183         }
184         else
185         {
186             auto A = buffer[0 .. n ^^ 2].sliced(n, n); buffer = buffer[n ^^ 2 .. $];
187             foreach(i; 0 .. n)
188                 copy(P[i, 0 .. i + 1], A[0 .. i + 1, i]);
189         }
190 
191         b[] = -q;
192         char equed;
193         T rcond, ferr, berr;
194         auto info = posvx('E', 'L',
195             A.canonical,
196             F.canonical,
197             equed,
198             scaling,
199             b,
200             x,
201             rcond,
202             ferr,
203             berr,
204             lapackWorkSpace,
205             iwork);
206 
207         version(boxcqp_compact)
208         {
209             copy(Pdiagonal, P.diagonal);
210         }
211 
212         if (info != 0 && info != n + 1)
213             return BoxQPStatus.numericError;
214     }
215 
216     foreach (i; 0 .. n)
217         if (!(l[i] <= x[i] && x[i] <= u[i]))
218             goto Start;
219     return BoxQPStatus.solved;
220 
221 Start:
222     auto flags = (()@trusted=>(cast(Flag*)bwork.ptr).sliced(n))();
223 
224     auto maxIterations = cast()settings.maxIterations;
225     if (!maxIterations)
226         maxIterations = cast(uint)n * 10 + 100; // fix
227 
228     auto la  = work[0 .. n]; work = work[n .. $];
229     auto mu  = work[0 .. n]; work = work[n .. $];
230 
231     la[] = 0;
232     mu[] = 0;
233 
234     MainLoop: foreach (step; 0 .. maxIterations)
235     {
236         {
237             size_t s;
238 
239             with(settings) foreach (i; 0 .. n)
240             {
241                 auto xl = x[i] - l[i];
242                 auto ux = u[i] - x[i];
243                 if (xl < 0 || xl < relTolerance + absTolerance * l[i].fabs && la[i] >= 0)
244                 {
245                     flags[i] = Flag.l;
246                     x[i] = l[i];
247                     mu[i] = 0;
248                 }
249                 else
250                 if (ux < 0 || ux < relTolerance + absTolerance * u[i].fabs && mu[i] >= 0)
251                 {
252                     flags[i] = Flag.u;
253                     x[i] = u[i];
254                     la[i] = 0;
255                 }
256                 else
257                 {
258                     flags[i] = Flag.s;
259                     iwork[s++] = cast(lapackint)i;
260                     mu[i]  = 0;
261                     la[i]  = 0;
262                 }
263             }
264 
265             if (s == n)
266                 break;
267 
268             {
269                 auto SIWorkspace = iwork[0 .. s];
270                 auto buffer = work;
271                 auto scaling = buffer[0 .. s]; buffer = buffer[s .. $];
272                 auto sX = buffer[0 .. s]; buffer = buffer[s .. $];
273                 auto b = buffer[0 .. s]; buffer = buffer[s .. $];
274                 auto lapackWorkSpace = buffer[0 .. s * 3]; buffer = buffer[s * 3 .. $];
275                 auto F = buffer[0 .. s ^^ 2].sliced(s, s); buffer = buffer[s ^^ 2 .. $];
276 
277                 version(boxcqp_compact)
278                     auto A = P[0 .. $ - 1, 1 .. $][$ - s .. $, $ - s .. $];
279                 else
280                     auto A = buffer[0 .. s ^^ 2].sliced(s, s); buffer = buffer[s ^^ 2 .. $];
281 
282                 foreach (ii, i; SIWorkspace.field)
283                 {
284                     Summator!(T, Summation.kbn) sum = q[i];
285                     uint jj;
286                     {
287                         auto Aii = A[0 .. $, ii];
288                         auto Pi = P[i, 0 .. $];
289                         foreach (j; 0 .. i)
290                             if (flags[j])
291                                 sum += Pi[j] * (flags[j] < 0 ? l : u)[j];
292                             else
293                                 Aii[jj++] = Pi[j];
294                     }
295                     {
296                         auto Aii = A[ii, 0 .. $];
297                         auto Pi = P[0 .. $, i];
298                         foreach (j; i .. n)
299                             if (flags[j])
300                                 sum += Pi[j] * (flags[j] < 0 ? l : u)[j];
301                             else
302                                 Aii[jj++] = Pi[j];
303                     }
304                     b[ii] = -sum.sum;
305                 }
306 
307                 {
308                     char equed;
309                     T rcond, ferr, berr;
310                     auto info = posvx('E', 'L',
311                         A.canonical,
312                         F.canonical,
313                         equed,
314                         scaling,
315                         b,
316                         sX,
317                         rcond,
318                         ferr,
319                         berr,
320                         lapackWorkSpace,
321                         SIWorkspace);
322                     
323                     if (info != 0 && info != s + 1)
324                         return BoxQPStatus.numericError;
325                 }
326 
327                 size_t ii;
328                 foreach (i; 0 .. n) if (flags[i] == Flag.s)
329                     x[i] = sX[ii++];
330             }
331         }
332 
333         foreach (i; 0 .. n) if (flags[i])
334         {
335             auto val = dot!T(P[i, 0 .. i], x[0 .. i]) + dot!T(P[i .. $, i], x[i .. $]) + q[i];
336             (flags[i] < 0 ? la : mu)[i] = flags[i] < 0 ? val : -val;
337         }
338 
339         foreach (i; 0 .. n)
340         {
341             final switch (flags[i])
342             {
343                 case Flag.l: if (la[i] >= 0) continue; continue MainLoop;
344                 case Flag.u: if (mu[i] >= 0) continue; continue MainLoop;
345                 case Flag.s: if (x[i] >= l[i] && x[i] <= u[i]) continue; continue MainLoop;
346             }
347         }
348 
349         applyBounds(x, l, u);
350 
351         version(none)
352         {
353             import std.traits, std.meta;
354             static auto assumePure(T)(T t)
355             if (isFunctionPointer!T || isDelegate!T)
356             {
357                 enum attrs = functionAttributes!T | FunctionAttribute.pure_;
358                 return cast(SetFunctionAttributes!(T, functionLinkage!T, attrs)) t;
359             }
360 
361             import core.stdc.stdio;
362             (()@trusted => cast(void) assumePure(&printf)("#### BOXCQP iters = %d\n", step + 1))();
363         }
364 
365         if (restoreUpperP)
366         {
367             while(P.length > 1)
368             {
369                 copy(P[1 .. $, 0], P[0, 1 .. $]);
370                 P.popFront!1;
371                 P.popFront!0;
372             }
373         }
374 
375         return BoxQPStatus.solved;
376     }
377 
378     return BoxQPStatus.maxIterations;
379 }
380 
381 ///
382 version(mir_optim_test)
383 unittest
384 {
385     import mir.ndslice;
386     import mir.algorithm.iteration;
387     import mir.math.common;
388 
389     auto P = [
390         [ 2.0, -1, 0],
391         [-1.0, 2, -1],
392         [ 0.0, -1, 2],
393     ].fuse.canonical;
394 
395     auto q = [3.0, -7, 5].sliced;
396     auto l = [-100.0, -2, 1].sliced;
397     auto u = [100.0, 2, 1].sliced;
398     auto x = slice!double(q.length);
399 
400     solveBoxQP(P, q, l, u, x);
401     assert(x.equal!approxEqual([-0.5, 2, 1]));
402 }
403 
404 package(mir) void applyBounds(T)(Slice!(T*) x, Slice!(const(T)*) l, Slice!(const(T)*) u)
405 {
406     pragma(inline, false);
407     import mir.math.common: fmin, fmax;
408     foreach (i; 0 .. x.length)
409         x[i] = x[i].fmin(u[i]).fmax(l[i]);
410 }