xref: /oneTBB/examples/graph/som/som_graph.cpp (revision e1b24895)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 //
18 // Self-organizing map in TBB flow::graph
19 //
20 //   This is an example of the use of cancellation in a graph.  After a point in searching for
21 //   the best match for an example, two examples are looked for simultaneously.  When the
22 //   earlier example is found and the update radius is determined, the affected searches
23 //   for the subsequent example are cancelled, and after the update they are restarted.
24 //   As the update radius shrinks fewer searches are cancelled, and by the last iterations
25 //   virtually all the work done for the speculating example is useful.
26 //
27 // first, a simple implementation with only one example vector
28 // at a time.
29 //
30 // we will do a color map (the simple example.)
31 //
32 //  graph algorithm
33 //
34 //       for some number of iterations
35 //           update radius r, weight of change L
36 //           for each example V
37 //               use graph to find BMU
38 //               for each part of map within radius of BMU W
39 //                   update vector:  W(t+1) = W(t) + w(dist)*L*(V - W(t))
40 
41 #ifndef NOMINMAX
42 #define NOMINMAX
43 #endif // NOMINMAX
44 
45 #include <algorithm>
46 
47 #define _MAIN_C_ 1
48 #include "som.hpp"
49 
50 #include "oneapi/tbb/flow_graph.h"
51 #include "oneapi/tbb/blocked_range2d.h"
52 #include "oneapi/tbb/tick_count.h"
53 #include "oneapi/tbb/task_arena.h"
54 #include "oneapi/tbb/global_control.h"
55 
56 #include "common/utility/utility.hpp"
57 #include "common/utility/get_default_num_threads.hpp"
58 
59 #define RED   0
60 #define GREEN 1
61 #define BLUE  2
62 
63 static int xranges = 1;
64 static int yranges = 1;
65 static int xsize = -1;
66 static int ysize = -1;
67 
68 static int global_i = 0;
69 static int speculation_start;
70 
71 #if EXTRA_DEBUG
72 std::vector<int> cancel_count;
73 std::vector<int> extra_count;
74 std::vector<int> missing_count;
75 std::vector<int> canceled_before;
76 #endif
77 std::vector<int> function_node_execs;
78 static int xRangeMax = 3;
79 static int yRangeMax = 3;
80 static bool dont_speculate = false;
81 static search_result_type last_update;
82 
83 class BMU_search_body {
84     SOMap &my_map;
85     subsquare_type my_square;
86     int &fn_tally;
87 
88 public:
BMU_search_body(SOMap & _m,subsquare_type & _sq,int & fnt)89     BMU_search_body(SOMap &_m, subsquare_type &_sq, int &fnt)
90             : my_map(_m),
91               my_square(_sq),
92               fn_tally(fnt) {}
BMU_search_body(const BMU_search_body & other)93     BMU_search_body(const BMU_search_body &other)
94             : my_map(other.my_map),
95               my_square(other.my_square),
96               fn_tally(other.fn_tally) {}
operator ()(const SOM_element s)97     search_result_type operator()(const SOM_element s) {
98         int my_x;
99         int my_y;
100         double min_dist = my_map.BMU_range(s, my_x, my_y, my_square);
101         ++fn_tally; // count how many times this function_node executed
102         return search_result_type(min_dist, my_x, my_y);
103     }
104 };
105 
106 typedef oneapi::tbb::flow::function_node<SOM_element, search_result_type> search_node;
107 typedef oneapi::tbb::flow::broadcast_node<SOM_element> b_node;
108 typedef std::vector<search_node *> search_node_vector_type;
109 typedef std::vector<search_node_vector_type> search_node_array_type;
110 typedef std::vector<oneapi::tbb::flow::graph *> graph_vector_type;
111 typedef std::vector<graph_vector_type> graph_array_type;
112 
113 #define SPECULATION_CNT 2
114 
115 oneapi::tbb::flow::graph *g[SPECULATION_CNT]; // main graph; there should only be one per epoch
116 b_node *send_to[SPECULATION_CNT]; // broadcast node to send exemplar to all function_nodes
117 oneapi::tbb::flow::queue_node<search_result_type>
118     *q[SPECULATION_CNT]; // queue for function nodes to put their results in
119 // each function_node should have its own graph
120 search_node_array_type *s_array[SPECULATION_CNT]; // 2d array of function nodes
121 graph_array_type *g_array[SPECULATION_CNT]; // 2d array of graphs
122 
123 // All graphs must locate in the same arena.
construct_graph(oneapi::tbb::task_arena & ta)124 oneapi::tbb::flow::graph *construct_graph(oneapi::tbb::task_arena &ta) {
125     oneapi::tbb::flow::graph *result;
126     ta.execute([&result] {
127         result = new oneapi::tbb::flow::graph();
128     });
129     return result;
130 }
131 
132 // build a set of SPECULATION_CNT graphs, each of which consists of a broadcast_node,
133 //    xranges x yranges function_nodes, and one queue_node for output.
134 //    once speculation starts, if i % SPECULATION_CNT is the current graph, (i+1) % SPECULATION_CNT
135 //    is the first speculation, and so on.
build_BMU_graph(SOMap & map1,oneapi::tbb::task_arena & ta)136 void build_BMU_graph(SOMap &map1, oneapi::tbb::task_arena &ta) {
137     // build current graph
138     xsize = ((int)map1.size() + xranges - 1) / xranges;
139     ysize = ((int)map1[0].size() + yranges - 1) / yranges;
140     function_node_execs.clear();
141     function_node_execs.reserve(xranges * yranges + 1);
142     for (int i = 0; i < xranges * yranges + 1; ++i)
143         function_node_execs.push_back(0);
144 
145     for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
146         g[scnt] = construct_graph(ta);
147         send_to[scnt] = new b_node(*(g[scnt])); // broadcast node to the function_nodes
148         q[scnt] = new oneapi::tbb::flow::queue_node<search_result_type>(*(g[scnt])); // output queue
149 
150         // create the function_nodes, tie to the graph
151         s_array[scnt] = new search_node_array_type;
152         s_array[scnt]->reserve(xranges);
153         g_array[scnt] = new graph_array_type;
154         g_array[scnt]->reserve(xranges);
155         for (int i = 0; i < (int)map1.size(); i += xsize) {
156             int xindex = i / xsize;
157             s_array[scnt]->push_back(search_node_vector_type());
158 #if EXTRA_DEBUG
159             if (s_array[scnt]->size() != xindex + 1) {
160                 printf("Error; s_array[%d]->size() == %d, xindex== %d\n",
161                        scnt,
162                        (int)(s_array[scnt]->size()),
163                        xindex);
164             }
165 #endif
166             (*s_array[scnt])[xindex].reserve(yranges);
167             g_array[scnt]->push_back(graph_vector_type());
168             (*g_array[scnt])[xindex].reserve(yranges);
169             for (int j = 0; j < (int)map1[0].size(); j += ysize) {
170                 int offset = (i / xsize) * yranges + (j / ysize);
171                 int xmax = (i + xsize) > (int)map1.size() ? (int)map1.size() : i + xsize;
172                 int ymax = (j + ysize) > (int)map1[0].size() ? (int)map1[0].size() : j + ysize;
173                 subsquare_type sst(i, xmax, 1, j, ymax, 1);
174                 BMU_search_body bb(map1, sst, function_node_execs[offset]);
175                 oneapi::tbb::flow::graph *g_local = construct_graph(ta);
176                 search_node *s =
177                     new search_node(*g_local, oneapi::tbb::flow::serial, bb); // copies Body
178                 (*g_array[scnt])[xindex].push_back(g_local);
179                 (*s_array[scnt])[xindex].push_back(s);
180                 oneapi::tbb::flow::make_edge(*(send_to[scnt]),
181                                              *s); // broadcast_node -> function_node
182                 oneapi::tbb::flow::make_edge(*s, *(q[scnt])); // function_node -> queue_node
183             }
184         }
185     }
186 }
187 
188 // Wait for the 2D array of flow::graphs.
wait_for_all_graphs(int cIndex)189 void wait_for_all_graphs(int cIndex) { // cIndex ranges over [0 .. SPECULATION_CNT - 1]
190     for (int x = 0; x < xranges; ++x) {
191         for (int y = 0; y < yranges; ++y) {
192             (*g_array[cIndex])[x][y]->wait_for_all();
193         }
194     }
195 }
196 
destroy_BMU_graph()197 void destroy_BMU_graph() {
198     for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
199         for (int i = 0; i < (int)(*s_array[scnt]).size(); ++i) {
200             for (int j = 0; j < (int)(*s_array[scnt])[i].size(); ++j) {
201                 delete (*s_array[scnt])[i][j];
202                 delete (*g_array[scnt])[i][j];
203             }
204         }
205         (*s_array[scnt]).clear();
206         delete s_array[scnt];
207         (*g_array[scnt]).clear();
208         delete g_array[scnt];
209         delete q[scnt];
210         delete send_to[scnt];
211         delete g[scnt];
212     }
213 }
214 
find_subrange_overlap(int const & xval,int const & yval,double const & radius,int & xlow,int & xhigh,int & ylow,int & yhigh)215 void find_subrange_overlap(int const &xval,
216                            int const &yval,
217                            double const &radius,
218                            int &xlow,
219                            int &xhigh,
220                            int &ylow,
221                            int &yhigh) {
222     xlow = int((xval - radius) / xsize);
223     xhigh = int((xval + radius) / xsize);
224     ylow = int((yval - radius) / ysize);
225     yhigh = int((yval + radius) / ysize);
226     // circle may fall partly outside map
227     if (xlow < 0)
228         xlow = 0;
229     if (xhigh >= xranges)
230         xhigh = xranges - 1;
231     if (ylow < 0)
232         ylow = 0;
233     if (yhigh >= yranges)
234         yhigh = yranges - 1;
235 #if EXTRA_DEBUG
236     if (xlow >= xranges)
237         printf(" Error *** xlow == %d\n", xlow);
238     if (xhigh < 0)
239         printf("Error *** xhigh == %d\n", xhigh);
240     if (ylow >= yranges)
241         printf("Error *** ylow == %d\n", ylow);
242     if (yhigh < 0)
243         printf("Error *** yhigh == %d\n", yhigh);
244 #endif
245 }
246 
overlap(int & xval,int & yval,search_result_type & sr)247 bool overlap(int &xval, int &yval, search_result_type &sr) {
248     int xlow, xhigh, ylow, yhigh;
249     find_subrange_overlap(
250         std::get<XV>(sr), std::get<YV>(sr), std::get<RADIUS>(sr), xlow, xhigh, ylow, yhigh);
251     return xval >= xlow && xval <= xhigh && yval >= ylow && yval <= yhigh;
252 }
253 
cancel_submaps(int & xval,int & yval,double & radius,int indx)254 void cancel_submaps(int &xval, int &yval, double &radius, int indx) {
255     int xlow;
256     int xhigh;
257     int ylow;
258     int yhigh;
259     find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
260     for (int x = xlow; x <= xhigh; ++x) {
261         for (int y = ylow; y <= yhigh; ++y) {
262             (*g_array[indx])[x][y]->cancel();
263         }
264     }
265 #if EXTRA_DEBUG
266     ++cancel_count[(xhigh - xlow + 1) * (yhigh - ylow + 1)];
267 #endif
268 }
269 
restart_submaps(int & xval,int & yval,double & radius,int indx,SOM_element & vector)270 void restart_submaps(int &xval, int &yval, double &radius, int indx, SOM_element &vector) {
271     int xlow;
272     int xhigh;
273     int ylow;
274     int yhigh;
275     find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
276     for (int x = xlow; x <= xhigh; ++x) {
277         for (int y = ylow; y <= yhigh; ++y) {
278             // have to reset the graph
279             (*g_array[indx])[x][y]->reset();
280             // and re-submit the exemplar for search.
281             (*s_array[indx])[x][y]->try_put(vector);
282         }
283     }
284 }
285 
graph_BMU(int indx)286 search_result_type graph_BMU(int indx) { // indx ranges over [0 .. SPECULATION_CNT -1]
287     wait_for_all_graphs(indx); // wait for the array of subgraphs
288     (g[indx])->wait_for_all();
289     std::vector<search_result_type> all_srs(xRangeMax * yRangeMax,
290                                             search_result_type(DBL_MAX, -1, -1));
291 #if EXTRA_DEBUG
292     int extra_computations = 0;
293 #endif
294     search_result_type sr;
295     search_result_type min_sr;
296     std::get<RADIUS>(min_sr) = DBL_MAX;
297     int result_count = 0;
298     while ((q[indx])->try_get(sr)) {
299         ++result_count;
300         // figure which submap this came from
301         int x = std::get<XV>(sr) / xsize;
302         int y = std::get<YV>(sr) / ysize;
303 #if EXTRA_DEBUG
304         if (x < 0 || x >= xranges)
305             printf(" ###  x value out of range (%d)\n", x);
306         if (y < 0 || y >= yranges)
307             printf(" ###  y value out of range (%d)\n", y);
308 #endif
309         int offset = x * yranges + y; // linearized subscript
310 #if EXTRA_DEBUG
311         if (std::get<RADIUS>(all_srs[offset]) !=
312             DBL_MAX) { // we've already got a result from this subsquare
313             ++extra_computations;
314         }
315         else if (std::get<XV>(all_srs[offset]) != -1) {
316             if (extra_debug)
317                 printf("More than one cancellation of [%d,%d] iteration %d\n", x, y, global_i);
318         }
319 #endif
320         all_srs[offset] = sr;
321         if (std::get<RADIUS>(sr) < std::get<RADIUS>(min_sr))
322             min_sr = sr;
323         else if (std::get<RADIUS>(sr) == std::get<RADIUS>(min_sr)) {
324             if (std::get<XV>(sr) < std::get<XV>(min_sr)) {
325                 min_sr = sr;
326             }
327             else if ((std::get<XV>(sr) == std::get<XV>(min_sr) &&
328                       std::get<YV>(sr) < std::get<YV>(min_sr))) {
329                 min_sr = sr;
330             }
331         }
332     }
333 #if EXTRA_DEBUG
334     if (result_count != xranges * yranges + extra_computations) {
335         // we are missing at least one of the expected results.  Tally the missing values
336         for (int i = 0; i < xranges * yranges; ++i) {
337             if (std::get<RADIUS>(all_srs[i]) == DBL_MAX) {
338                 // i == x*yranges + y
339                 int xval = i / yranges;
340                 int yval = i % yranges;
341                 bool received_cancel_result = std::get<XV>(all_srs[i]) != -1;
342                 if (overlap(xval, yval, last_update)) {
343                     // we have previously canceled this subsquare.
344                     printf("No result for [%d,%d] which was canceled(%s)\n",
345                            xval,
346                            yval,
347                            received_cancel_result ? "T" : "F");
348                     ++canceled_before[i];
349                 }
350                 else {
351                     printf("No result for [%d,%d] which was not canceled(%s)\n",
352                            xval,
353                            yval,
354                            received_cancel_result ? "T" : "F");
355                 }
356                 ++missing_count[i];
357             }
358         }
359     }
360     if (extra_computations)
361         ++extra_count[extra_computations];
362 #endif
363     return min_sr;
364     // end of one epoch
365 }
366 
graph_teach(SOMap & map1,teaching_vector_type & in,oneapi::tbb::task_arena & ta)367 void graph_teach(SOMap &map1, teaching_vector_type &in, oneapi::tbb::task_arena &ta) {
368     build_BMU_graph(map1, ta);
369 #if EXTRA_DEBUG
370     cancel_count.clear();
371     extra_count.clear();
372     missing_count.clear();
373     canceled_before.clear();
374     cancel_count.reserve(xRangeMax * yRangeMax + 1);
375     extra_count.reserve(xRangeMax * yRangeMax + 1);
376     missing_count.reserve(xRangeMax * yRangeMax + 1);
377     canceled_before.reserve(xRangeMax * yRangeMax + 1);
378     for (int i = 0; i < xRangeMax * yRangeMax + 1; ++i) {
379         cancel_count.push_back(0);
380         extra_count.push_back(0);
381         missing_count.push_back(0);
382         canceled_before.push_back(0);
383     }
384 #endif
385     // normally the training would pick random exemplars to teach the SOM.  We need
386     // the process to be reproducible, so we will pick the exemplars in order, [0, in.size())
387     int next_j = 0;
388     for (int epoch = 0; epoch < nPasses; ++epoch) {
389         global_i = epoch;
390         bool canceled_submaps = false;
391         int j = next_j; // try to make reproducible
392         next_j = (epoch + 1) % in.size();
393         search_result_type min_sr;
394         if (epoch < speculation_start) {
395             (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
396         }
397         else if (epoch == speculation_start) {
398             (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
399             if (epoch < nPasses - 1) {
400                 (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
401             }
402         }
403         else if (epoch < nPasses - 1) {
404             (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
405         }
406         min_sr = graph_BMU(epoch % SPECULATION_CNT); //calls wait_for_all()
407         double min_distance = std::get<0>(min_sr);
408         double radius = max_radius * exp(-(double)epoch * radius_decay_rate);
409         double learning_rate = max_learning_rate * exp(-(double)epoch * learning_decay_rate);
410         if (epoch >= speculation_start && epoch < (nPasses - 1)) {
411             // have to cancel the affected submaps
412             cancel_submaps(
413                 std::get<XV>(min_sr), std::get<YV>(min_sr), radius, (epoch + 1) % SPECULATION_CNT);
414             canceled_submaps = true;
415         }
416         map1.epoch_update(
417             in[j], epoch, std::get<1>(min_sr), std::get<2>(min_sr), radius, learning_rate);
418         ++global_i;
419         if (canceled_submaps) {
420             // do I have to wait for all the non-canceled speculative graph to complete first?
421             // yes, in case a canceled task was already executing.
422             wait_for_all_graphs((epoch + 1) % SPECULATION_CNT); // wait for the array of subgraphs
423             restart_submaps(std::get<1>(min_sr),
424                             std::get<2>(min_sr),
425                             radius,
426                             (epoch + 1) % SPECULATION_CNT,
427                             in[next_j]);
428         }
429 
430         last_update = min_sr;
431         std::get<RADIUS>(last_update) = radius; // not smallest value, but range of effect
432     }
433     destroy_BMU_graph();
434 }
435 
436 static const double serial_time_adjust = 1.25;
437 static double radius_fraction = 3.0;
438 
main(int argc,char * argv[])439 int main(int argc, char *argv[]) {
440     int l_speculation_start;
441     utility::thread_number_range threads(
442         utility::get_default_num_threads,
443         utility::
444             get_default_num_threads() // run only the default number of threads if none specified
445     );
446 
447     utility::parse_cli_arguments(
448         argc,
449         argv,
450         utility::cli_argument_pack()
451             //"-h" option for for displaying help is present implicitly
452             .positional_arg(
453                 threads,
454                 "n-of-threads",
455                 "number of threads to use; a range of the form low[:high], where low and optional high are non-negative integers or 'auto' for the TBB default.")
456             // .positional_arg(InputFileName,"input-file","input file name")
457             // .positional_arg(OutputFileName,"output-file","output file name")
458             .positional_arg(
459                 radius_fraction, "radius-fraction", "size of radius at which to start speculating")
460             .positional_arg(
461                 nPasses, "number-of-epochs", "number of examples used in learning phase")
462             .arg(cancel_test, "cancel-test", "test for cancel signal while finding BMU")
463             .arg(extra_debug, "debug", "additional output")
464             .arg(dont_speculate, "nospeculate", "don't speculate in SOM map teaching"));
465 
466     readInputData();
467     max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
468     // need this value for the 1x1 timing below
469     radius_decay_rate = -(log(1.0 / (double)max_radius) / (double)nPasses);
470     find_data_ranges(my_teaching, max_range, min_range);
471     if (extra_debug) {
472         printf("Data range: ");
473         remark_SOM_element(min_range);
474         printf(" to ");
475         remark_SOM_element(max_range);
476         printf("\n");
477     }
478 
479     // find how much time is taken for the single function_node case.
480     // adjust nPasses so the 1x1 time is somewhere around serial_time_adjust seconds.
481     // make sure the example test runs for at least 0.5 second.
482     for (;;) {
483         // Restrict max concurrency level via task_arena interface
484         oneapi::tbb::task_arena ta(1);
485         SOMap map1(xMax, yMax);
486         speculation_start = nPasses + 1; // Don't speculate
487 
488         xranges = 1;
489         yranges = 1;
490         map1.initialize(InitializeGradient, max_range, min_range);
491         oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
492         graph_teach(map1, my_teaching, ta);
493         oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
494         double nSeconds = (t1 - t0).seconds();
495         if (nSeconds < 0.5) {
496             xMax *= 2;
497             yMax *= 2;
498             continue;
499         }
500         double size_adjust = sqrt(serial_time_adjust / nSeconds);
501         xMax = (int)((double)xMax * size_adjust);
502         yMax = (int)((double)yMax * size_adjust);
503         max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
504         radius_decay_rate = log((double)max_radius) / (double)nPasses;
505 
506         if (extra_debug) {
507             printf("original 1x1 case ran in %g seconds\n", nSeconds);
508             printf("   Size of table == %d x %d\n", xMax, yMax);
509             printf("   radius_decay_rate == %g\n", radius_decay_rate);
510         }
511         break;
512     }
513 
514     // the "max_radius" starts at 1/2*radius_fraction the table size.  To start the speculation when the radius is
515     // 1 / n * the table size, the constant in the log below should be n / 2.  so 2 == 1/4, 3 == 1/6th,
516     // et c.
517     if (dont_speculate) {
518         l_speculation_start = nPasses + 1;
519         if (extra_debug)
520             printf("speculation will not be done\n");
521     }
522     else {
523         if (radius_fraction < 1.0) {
524             if (extra_debug)
525                 printf("Warning: radius_fraction should be >= 1.  Setting to 1.\n");
526             radius_fraction = 1.0;
527         }
528         l_speculation_start = (int)((double)nPasses * log(radius_fraction) / log((double)nPasses));
529         if (extra_debug)
530             printf("We will start speculation at iteration %d\n", l_speculation_start);
531     }
532     double single_time; // for speedup calculations
533 #if EXTRA_DEBUG
534     // storage for the single-subrange answers, for comparing maps
535     std::vector<double> single_dist;
536     single_dist.reserve(my_teaching.size());
537     std::vector<int> single_xval;
538     single_xval.reserve(my_teaching.size());
539     std::vector<int> single_yval;
540     single_yval.reserve(my_teaching.size());
541 #endif
542     //TODO: Investigate how to not require mandatory concurrency
543     for (int p = std::max(threads.first, 2); p <= std::max(threads.last, 2); ++p) {
544         // Restrict max concurrency level via task_arena interface
545         oneapi::tbb::global_control limit(oneapi::tbb::global_control::max_allowed_parallelism, p);
546         oneapi::tbb::task_arena ta(p);
547         if (extra_debug)
548             printf(" -------------- Running with %d threads. ------------\n", p);
549         // run the SOM build for a series of subranges
550         for (xranges = 1; xranges <= xRangeMax; ++xranges) {
551             for (yranges = xranges; yranges <= yRangeMax; ++yranges) {
552                 if (xranges == 1 && yranges == 1) {
553                     // don't pointlessly speculate if we're only running one subrange.
554                     speculation_start = nPasses + 1;
555                 }
556                 else {
557                     speculation_start = l_speculation_start;
558                 }
559                 SOMap map1(xMax, yMax);
560                 map1.initialize(InitializeGradient, max_range, min_range);
561 
562                 if (extra_debug)
563                     printf("Start learning for [%d,%d] ----------- \n", xranges, yranges);
564                 oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
565                 graph_teach(map1, my_teaching, ta);
566                 oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
567 
568                 if (extra_debug)
569                     printf("Done learning for [%d,%d], which took %g seconds ",
570                            xranges,
571                            yranges,
572                            (t1 - t0).seconds());
573                 if (xranges == 1 && yranges == 1)
574                     single_time = (t1 - t0).seconds();
575                 if (extra_debug)
576                     printf(": speedup == %g\n", single_time / (t1 - t0).seconds());
577 
578 #if EXTRA_DEBUG
579                 if (extra_debug) {
580                     // number of times cancel was called, indexed by number of subranges canceled
581                     for (int i = 0; i < cancel_count.size(); ++i) {
582                         // only write output if we have a non-zero value.
583                         if (cancel_count[i] > 0) {
584                             int totalcnt = 0;
585                             printf("     cancellations: ");
586                             for (int j = 0; j < cancel_count.size(); ++j) {
587                                 if (cancel_count[j]) {
588                                     printf(" %d [%d]", j, cancel_count[j]);
589                                     totalcnt += cancel_count[j];
590                                 }
591                             }
592                             totalcnt += speculation_start;
593                             printf(" for a total of %d\n", totalcnt);
594                             break; // from for
595                         }
596                     }
597 
598                     // number of extra results (these occur when the subrange task starts before
599                     // cancel is received.)
600                     for (int i = 0; i < extra_count.size(); ++i) {
601                         if (extra_count[i] > 0) {
602                             int totalcnt = 0;
603                             printf("extra computations: ");
604                             for (int j = 0; j < extra_count.size(); ++j) {
605                                 if (extra_count[j]) {
606                                     printf(" %d[%d]", j, extra_count[j]);
607                                     totalcnt += extra_count[j];
608                                 }
609                             }
610                             totalcnt += speculation_start;
611                             printf(" for a total of %d\n", totalcnt);
612                             break; // from for
613                         }
614                     }
615 
616                     // here we count the number of times we looked for a particular subrange when fetching
617                     // the queue_node output and didn't find anything.  This may occur when a function_node
618                     // is "stuck" and doesn't process some number of exemplars.  function_node_execs is
619                     // a count of the number of times the corresponding function_node was executed (in
620                     // case the problem is dropped output in the queue_node.)
621                     for (int i = 0; i < missing_count.size(); ++i) {
622                         if (missing_count[i]) {
623                             int xval = i / yranges;
624                             int yval = i % yranges;
625                             printf(" f_node[%d,%d] missed %d values", xval, yval, missing_count[i]);
626                             if (canceled_before[i]) {
627                                 printf(" canceled_before == %d", canceled_before[i]);
628                             }
629                             printf(", fn_tally == %d\n", function_node_execs[i]);
630                         }
631                     }
632                 }
633 
634                 // check that output matches the 1x1 case
635                 for (int i = 0; i < my_teaching.size(); ++i) {
636                     int xdist;
637                     int ydist;
638                     double my_dist = map1.BMU(my_teaching[i], xdist, ydist);
639                     if (xranges == 1 && yranges == 1) {
640                         single_dist.push_back(my_dist);
641                         single_xval.push_back(xdist);
642                         single_yval.push_back(ydist);
643                     }
644                     else {
645                         if (single_dist[i] != my_dist || single_xval[i] != xdist ||
646                             single_yval[i] != ydist)
647                             printf(
648                                 "Error in output: expecting <%g, %d, %d>, but got <%g, %d, %d>\n",
649                                 single_dist[i],
650                                 single_xval[i],
651                                 single_yval[i],
652                                 my_dist,
653                                 xdist,
654                                 ydist);
655                     }
656                 }
657 #endif
658             } // yranges
659         } // xranges
660     } // #threads p
661     printf("done\n");
662     return 0;
663 }
664