The OpenD Programming Language

1 /++
2 This is a submodule of $(MREF mir,graph).
3 
4 Tarjan's strongly connected components algorithm.
5 
6 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
7 Copyright: 2020 Ilia Ki, Kaleidic Associates Advisory Limited, Symmetry Investments
8 Authors: Ilia Ki
9 
10 Macros:
11 SUBREF = $(REF_ALTTEXT $(TT $2), $2, mir, ndslice, $1)$(NBSP)
12 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
13 +/
14 
15 module mir.graph.tarjan;
16 
17 import std.traits;
18 
19 import mir.math.common: fmamath;
20 
21 @fmamath:
22 
23 
24 /++
25 Classic Tarjan's strongly connected components algorithm.
26 
27 Tarjan's algorithm is an algorithm in graph theory for finding the strongly connected components of a graph.
28 It runs in linear time, matching the time bound for alternative methods including Kosaraju's algorithm and the path-based strong component algorithm.
29 
30 The implementation is loop based. It does not use recursion and does not have stack overflow issues.
31 
32 Complexity: worst-case `O(|V| + |E|)`.
33 
34 Params:
35     RC = nogc mode, refcounted output
36     graph = components (ndslice) sorted in the direction of traversal of the graph. Each component is an array of indeces.
37 Returns:
38     components (ndslice of arrays of indices)
39 
40 Note:
41     The implementation returns components sorted in the direction of traversal of the graph.
42     $(NOTE Most of other Tarjan implementations returns reverse order.)
43 
44 See_also:
45     $(SUBREF utility, graph)
46 +/
47 pragma(inline, false)
48 auto tarjan(bool RC = false, G, I = Unqual!(ForeachType!(ForeachType!G)))(G graph)
49     if (isUnsigned!I)
50 {
51     import mir.utility: min;
52 
53     static if (I.sizeof >= uint.sizeof)
54         alias S = size_t;
55     else
56         alias S = uint;
57 
58     enum undefined = I.max;
59 
60     static struct IndexNode
61     {
62         I index;
63         I lowlink;
64 
65 @property:
66 
67         bool isRoot()()
68         {
69             return index == lowlink;
70         }
71 
72         bool isUndefined()()
73         {
74             return index == undefined;
75         }
76     }
77 
78     static struct LoopNode
79     {
80         union
81         {
82             struct
83             {
84                 I i;
85                 I j;
86             }
87             S index;
88         }
89     }
90 
91     
92     sizediff_t stackIndex;
93     sizediff_t backStackIndex = graph.length;
94     sizediff_t componentBackStackIndex = graph.length + 1;
95 
96     static if (RC)
97     {
98         import mir.rc.array;
99         auto onStack = RCArray!bool(graph.length);
100         auto stack = RCArray!I(graph.length, true);
101         auto indeces = RCArray!IndexNode(graph.length, true);
102         auto loopStack = RCArray!LoopNode(componentBackStackIndex, true);
103     }
104     else
105     {
106         I[] stack;
107         IndexNode[] indeces;
108         LoopNode[] loopStack;
109 
110         bool[] onStack = new bool[graph.length];
111         if (__ctfe)
112         {
113 
114             stack = new I[graph.length];
115             indeces = new IndexNode[graph.length];
116             loopStack = new LoopNode[componentBackStackIndex];
117         }
118         else
119         {
120             () @trusted {
121                 import std.array: uninitializedArray;
122 
123                 stack = uninitializedArray!(I[])(graph.length);
124                 indeces = uninitializedArray!(IndexNode[])(graph.length);
125                 loopStack = uninitializedArray!(LoopNode[])(componentBackStackIndex);
126             } ();
127         }
128     }
129 
130     foreach(ref node; indeces)
131         node.index = undefined;
132 
133     I index;
134     foreach(size_t v; 0u .. graph.length)
135     {
136         if (indeces[v].isUndefined)
137         {
138             sizediff_t loopStackIndex;
139         loop:
140             // Set the depth index for v to the smallest unused index
141             indeces[v].index = cast(I) index;
142             indeces[v].lowlink = cast(I) index;
143             index++;
144             stack[stackIndex++] = cast(I) v;
145             onStack[v] = true;
146 
147             // Consider successors of v
148             auto e = graph[v];
149             I w;
150             size_t wi;
151 
152             for (; wi < e.length; wi++)
153             {
154                 w = e[wi];
155                 if (onStack[w])
156                 {
157                     // Successor w is in stack S and hence in the current SCC
158                     // If w is not on stack, then (v, w) is a cross-edge in the DFS tree and must be ignored
159                     // Note: The next line may look odd - but is correct.
160                     // It says w.index not w.lowlink; that is deliberate and from the original paper
161                     indeces[v].lowlink = min(indeces[v].lowlink, indeces[w].index);
162                     continue;
163                 }
164                 if (indeces[w].isUndefined)
165                 {
166                     // Successor w has not yet been visited; recurse on it
167                     // strongconnect(w)
168                     assert(loopStackIndex < loopStack.length);
169                     loopStack[loopStackIndex] = LoopNode(cast(I) v, cast(I) wi);
170                     ++loopStackIndex;
171                     assert(componentBackStackIndex > loopStackIndex);
172                     v = e[wi];
173                     goto loop;
174                 retRec:
175                     v = loopStack[loopStackIndex].i;
176                     wi = loopStack[loopStackIndex].j;
177                     e = graph[v];
178                     w = e[wi];
179                     indeces[v].lowlink = min(indeces[v].lowlink, indeces[w].lowlink);
180                 }
181             }
182 
183             // If v is a root node, pop the stack and generate an SCC
184             if (indeces[v].isRoot)
185             {
186                 // start a new strongly connected component
187                 do
188                 {
189                     assert(stackIndex > 0);
190                     assert(backStackIndex > 0);
191                     // add w to current strongly connected component
192                     --backStackIndex;
193                     --stackIndex;
194                     w = stack[backStackIndex] = stack[stackIndex];
195                     onStack[w] = false;
196                 }
197                 while (w != v);
198                 
199                 // output the current strongly connected component
200                 assert(componentBackStackIndex > loopStackIndex);
201                 --componentBackStackIndex;
202                 loopStack[componentBackStackIndex].index = cast(S) backStackIndex;
203             }
204             if (--loopStackIndex >= 0)
205                 goto retRec;
206         }
207     }
208 
209     const indexLength = graph.length + 1 - componentBackStackIndex + 1;
210     static if (RC)
211     {
212         auto pairwiseIndex = RCArray!S(indexLength, true);
213     }
214     else
215     {
216         S[] pairwiseIndex;
217         if (__ctfe)
218         {
219             pairwiseIndex = new S[indexLength];
220         }
221         else
222         {
223             () @trusted {
224                 import std.array: uninitializedArray;
225                 pairwiseIndex = uninitializedArray!(S[])(indexLength);
226             } ();
227         }
228     }
229     foreach (i, ref e; loopStack[][componentBackStackIndex .. $])
230     {
231         pairwiseIndex[i] = e.index;
232     }
233     pairwiseIndex[$ - 1] = cast(I) graph.length;
234 
235     import mir.ndslice.topology: chopped;
236     static if (RC)
237     {
238         import core.lifetime: move;
239         return chopped(RCI!I(stack.move), pairwiseIndex.asSlice);
240     }
241     else
242     {
243         return (()@trusted {return stack.ptr; }()).chopped(pairwiseIndex);
244     }
245 }
246 
247 /++
248 ------
249         4 <- 5 <- 6 -------> 7 -> 8 -> 11
250         |    ^   ^           ^    |
251         v    |   |           |    |
252   0 -> 1 -> 2 -> 3 -> 10     9 <---
253 ------
254 +/
255 pure version(mir_test) unittest
256 {
257     import mir.graph;
258     import mir.graph.tarjan;
259 
260     GraphSeries!(string, uint) gs = [
261         "00": ["01"],
262         "01": ["02"],
263         "02": ["05", "03"],
264         "03": ["06", "10"],
265         "04": ["01"],
266         "05": ["04"],
267         "06": ["05", "07"],
268         "07": ["08"],
269         "08": ["09", "11"],
270         "09": ["07"],
271         "10": [],
272         "11": [],
273     ].graphSeries;
274 
275 
276     static immutable result = [
277         [0],
278         [1, 2, 5, 4, 3, 6],
279         [10],
280         [7, 8, 9],
281         [11]];
282 
283     // chec GC interface
284     auto components = gs.data.tarjan;
285     assert(components == result);
286     // check @nogc interface
287     // Note: The lambda function is used here to show @nogc mode explicitly.
288     auto rccomponents = (() @nogc => gs.data.tarjan!true )();
289     assert(rccomponents == result);
290 }
291 
292 /++
293 Tests that the graph `0 -> 1 -> 2 -> 3 -> 4` returns 4 components.
294 +/
295 pure version(mir_test) unittest
296 {
297     import mir.graph;
298     import mir.graph.tarjan;
299 
300     GraphSeries!(char, uint) gs = [
301         'a': ['b'],
302         'b': ['c'],
303         'c': ['d'],
304         'd': ['q'],
305         'q': [],
306     ].graphSeries;
307 
308     auto scc = gs.data.tarjan;
309 
310     assert(scc == [[0], [1], [2], [3], [4]]);
311 }
312 
313 /++
314 ----
315  0 <- 2 <-- 5 <--> 6
316  |  ^ ^ ^___       
317  v_|  |     |      _ 
318  1 <- 3 <-> 4 <-- 7_|
319 ----
320 +/
321 pure version(mir_test) unittest
322 {
323     import mir.graph;
324     import mir.graph.tarjan;
325 
326     auto gs = [
327         0: [1],
328         1: [2],
329         2: [0],
330         3: [1, 2, 4],
331         4: [3, 2],
332         5: [2, 6],
333         6: [5],
334         7: [4, 7],
335     ].graphSeries;
336 
337     auto components = gs.data.tarjan;
338 
339     assert(components == [
340         [7],
341         [5, 6],
342         [3, 4],
343         [0, 1, 2],
344     ]);
345 }
346 
347 /++
348 -----
349  2 <-> 1
350  |    ^
351  v_0 /
352    
353 -----
354 +/
355 pure version(mir_test) unittest
356 {
357     import mir.graph;
358     import mir.graph.tarjan;
359 
360     auto gs = [
361         0: [1],
362         1: [2],
363         2: [0, 1],
364     ].graphSeries;
365 
366     auto components = gs.data.tarjan;
367 
368     assert(components == [[0, 1, 2]]);
369 }
370 
371 /++
372 Tests that a strongly connected graph, where components have
373 to get through previously visited components to get to the
374 graph root works properly
375 
376 This test demonstrates a hard to detect bug, where vertices
377 were being marked 'off-stack' after they were first visited,
378 not when they were actually removed from the stack
379 +/
380 pure version(mir_test) unittest
381 {
382     import mir.graph;
383     import mir.graph.tarjan;
384 
385     auto root = 0;
386     auto lvl1 = [1,2,3,4,5,6,7,8,9,10];
387     auto lvl2 = [11,12,13,14,15,16,17,18,19,20];
388 
389     int[][int] aar;
390     aar[root] = lvl1;
391     foreach(int v; lvl1)
392         aar[v] = lvl2;
393     foreach(int v; lvl2)
394         aar[v] = [root];
395 
396     auto gs = aar.graphSeries;
397 
398     auto components = gs.data.tarjan;
399 
400     assert(components == [[root] ~ [lvl1[0]] ~ lvl2 ~ lvl1[1 .. $]]);
401 }