1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 //
18 // Self-organizing map in TBB flow::graph
19 //
20 // This is an example of the use of cancellation in a graph. After a point in searching for
21 // the best match for an example, two examples are looked for simultaneously. When the
22 // earlier example is found and the update radius is determined, the affected searches
23 // for the subsequent example are cancelled, and after the update they are restarted.
24 // As the update radius shrinks fewer searches are cancelled, and by the last iterations
25 // virtually all the work done for the speculating example is useful.
26 //
27 // first, a simple implementation with only one example vector
28 // at a time.
29 //
30 // we will do a color map (the simple example.)
31 //
32 // graph algorithm
33 //
34 // for some number of iterations
35 // update radius r, weight of change L
36 // for each example V
37 // use graph to find BMU
38 // for each part of map within radius of BMU W
39 // update vector: W(t+1) = W(t) + w(dist)*L*(V - W(t))
40
41 #ifndef NOMINMAX
42 #define NOMINMAX
43 #endif // NOMINMAX
44
45 #include <algorithm>
46
47 #define _MAIN_C_ 1
48 #include "som.hpp"
49
50 #include "oneapi/tbb/flow_graph.h"
51 #include "oneapi/tbb/blocked_range2d.h"
52 #include "oneapi/tbb/tick_count.h"
53 #include "oneapi/tbb/task_arena.h"
54 #include "oneapi/tbb/global_control.h"
55
56 #include "common/utility/utility.hpp"
57 #include "common/utility/get_default_num_threads.hpp"
58
59 #define RED 0
60 #define GREEN 1
61 #define BLUE 2
62
63 static int xranges = 1;
64 static int yranges = 1;
65 static int xsize = -1;
66 static int ysize = -1;
67
68 static int global_i = 0;
69 static int speculation_start;
70
71 #if EXTRA_DEBUG
72 std::vector<int> cancel_count;
73 std::vector<int> extra_count;
74 std::vector<int> missing_count;
75 std::vector<int> canceled_before;
76 #endif
77 std::vector<int> function_node_execs;
78 static int xRangeMax = 3;
79 static int yRangeMax = 3;
80 static bool dont_speculate = false;
81 static search_result_type last_update;
82
83 class BMU_search_body {
84 SOMap &my_map;
85 subsquare_type my_square;
86 int &fn_tally;
87
88 public:
BMU_search_body(SOMap & _m,subsquare_type & _sq,int & fnt)89 BMU_search_body(SOMap &_m, subsquare_type &_sq, int &fnt)
90 : my_map(_m),
91 my_square(_sq),
92 fn_tally(fnt) {}
BMU_search_body(const BMU_search_body & other)93 BMU_search_body(const BMU_search_body &other)
94 : my_map(other.my_map),
95 my_square(other.my_square),
96 fn_tally(other.fn_tally) {}
operator ()(const SOM_element s)97 search_result_type operator()(const SOM_element s) {
98 int my_x;
99 int my_y;
100 double min_dist = my_map.BMU_range(s, my_x, my_y, my_square);
101 ++fn_tally; // count how many times this function_node executed
102 return search_result_type(min_dist, my_x, my_y);
103 }
104 };
105
106 typedef oneapi::tbb::flow::function_node<SOM_element, search_result_type> search_node;
107 typedef oneapi::tbb::flow::broadcast_node<SOM_element> b_node;
108 typedef std::vector<search_node *> search_node_vector_type;
109 typedef std::vector<search_node_vector_type> search_node_array_type;
110 typedef std::vector<oneapi::tbb::flow::graph *> graph_vector_type;
111 typedef std::vector<graph_vector_type> graph_array_type;
112
113 #define SPECULATION_CNT 2
114
115 oneapi::tbb::flow::graph *g[SPECULATION_CNT]; // main graph; there should only be one per epoch
116 b_node *send_to[SPECULATION_CNT]; // broadcast node to send exemplar to all function_nodes
117 oneapi::tbb::flow::queue_node<search_result_type>
118 *q[SPECULATION_CNT]; // queue for function nodes to put their results in
119 // each function_node should have its own graph
120 search_node_array_type *s_array[SPECULATION_CNT]; // 2d array of function nodes
121 graph_array_type *g_array[SPECULATION_CNT]; // 2d array of graphs
122
123 // All graphs must locate in the same arena.
construct_graph(oneapi::tbb::task_arena & ta)124 oneapi::tbb::flow::graph *construct_graph(oneapi::tbb::task_arena &ta) {
125 oneapi::tbb::flow::graph *result;
126 ta.execute([&result] {
127 result = new oneapi::tbb::flow::graph();
128 });
129 return result;
130 }
131
132 // build a set of SPECULATION_CNT graphs, each of which consists of a broadcast_node,
133 // xranges x yranges function_nodes, and one queue_node for output.
134 // once speculation starts, if i % SPECULATION_CNT is the current graph, (i+1) % SPECULATION_CNT
135 // is the first speculation, and so on.
build_BMU_graph(SOMap & map1,oneapi::tbb::task_arena & ta)136 void build_BMU_graph(SOMap &map1, oneapi::tbb::task_arena &ta) {
137 // build current graph
138 xsize = ((int)map1.size() + xranges - 1) / xranges;
139 ysize = ((int)map1[0].size() + yranges - 1) / yranges;
140 function_node_execs.clear();
141 function_node_execs.reserve(xranges * yranges + 1);
142 for (int i = 0; i < xranges * yranges + 1; ++i)
143 function_node_execs.push_back(0);
144
145 for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
146 g[scnt] = construct_graph(ta);
147 send_to[scnt] = new b_node(*(g[scnt])); // broadcast node to the function_nodes
148 q[scnt] = new oneapi::tbb::flow::queue_node<search_result_type>(*(g[scnt])); // output queue
149
150 // create the function_nodes, tie to the graph
151 s_array[scnt] = new search_node_array_type;
152 s_array[scnt]->reserve(xranges);
153 g_array[scnt] = new graph_array_type;
154 g_array[scnt]->reserve(xranges);
155 for (int i = 0; i < (int)map1.size(); i += xsize) {
156 int xindex = i / xsize;
157 s_array[scnt]->push_back(search_node_vector_type());
158 #if EXTRA_DEBUG
159 if (s_array[scnt]->size() != xindex + 1) {
160 printf("Error; s_array[%d]->size() == %d, xindex== %d\n",
161 scnt,
162 (int)(s_array[scnt]->size()),
163 xindex);
164 }
165 #endif
166 (*s_array[scnt])[xindex].reserve(yranges);
167 g_array[scnt]->push_back(graph_vector_type());
168 (*g_array[scnt])[xindex].reserve(yranges);
169 for (int j = 0; j < (int)map1[0].size(); j += ysize) {
170 int offset = (i / xsize) * yranges + (j / ysize);
171 int xmax = (i + xsize) > (int)map1.size() ? (int)map1.size() : i + xsize;
172 int ymax = (j + ysize) > (int)map1[0].size() ? (int)map1[0].size() : j + ysize;
173 subsquare_type sst(i, xmax, 1, j, ymax, 1);
174 BMU_search_body bb(map1, sst, function_node_execs[offset]);
175 oneapi::tbb::flow::graph *g_local = construct_graph(ta);
176 search_node *s =
177 new search_node(*g_local, oneapi::tbb::flow::serial, bb); // copies Body
178 (*g_array[scnt])[xindex].push_back(g_local);
179 (*s_array[scnt])[xindex].push_back(s);
180 oneapi::tbb::flow::make_edge(*(send_to[scnt]),
181 *s); // broadcast_node -> function_node
182 oneapi::tbb::flow::make_edge(*s, *(q[scnt])); // function_node -> queue_node
183 }
184 }
185 }
186 }
187
188 // Wait for the 2D array of flow::graphs.
wait_for_all_graphs(int cIndex)189 void wait_for_all_graphs(int cIndex) { // cIndex ranges over [0 .. SPECULATION_CNT - 1]
190 for (int x = 0; x < xranges; ++x) {
191 for (int y = 0; y < yranges; ++y) {
192 (*g_array[cIndex])[x][y]->wait_for_all();
193 }
194 }
195 }
196
destroy_BMU_graph()197 void destroy_BMU_graph() {
198 for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
199 for (int i = 0; i < (int)(*s_array[scnt]).size(); ++i) {
200 for (int j = 0; j < (int)(*s_array[scnt])[i].size(); ++j) {
201 delete (*s_array[scnt])[i][j];
202 delete (*g_array[scnt])[i][j];
203 }
204 }
205 (*s_array[scnt]).clear();
206 delete s_array[scnt];
207 (*g_array[scnt]).clear();
208 delete g_array[scnt];
209 delete q[scnt];
210 delete send_to[scnt];
211 delete g[scnt];
212 }
213 }
214
find_subrange_overlap(int const & xval,int const & yval,double const & radius,int & xlow,int & xhigh,int & ylow,int & yhigh)215 void find_subrange_overlap(int const &xval,
216 int const &yval,
217 double const &radius,
218 int &xlow,
219 int &xhigh,
220 int &ylow,
221 int &yhigh) {
222 xlow = int((xval - radius) / xsize);
223 xhigh = int((xval + radius) / xsize);
224 ylow = int((yval - radius) / ysize);
225 yhigh = int((yval + radius) / ysize);
226 // circle may fall partly outside map
227 if (xlow < 0)
228 xlow = 0;
229 if (xhigh >= xranges)
230 xhigh = xranges - 1;
231 if (ylow < 0)
232 ylow = 0;
233 if (yhigh >= yranges)
234 yhigh = yranges - 1;
235 #if EXTRA_DEBUG
236 if (xlow >= xranges)
237 printf(" Error *** xlow == %d\n", xlow);
238 if (xhigh < 0)
239 printf("Error *** xhigh == %d\n", xhigh);
240 if (ylow >= yranges)
241 printf("Error *** ylow == %d\n", ylow);
242 if (yhigh < 0)
243 printf("Error *** yhigh == %d\n", yhigh);
244 #endif
245 }
246
overlap(int & xval,int & yval,search_result_type & sr)247 bool overlap(int &xval, int &yval, search_result_type &sr) {
248 int xlow, xhigh, ylow, yhigh;
249 find_subrange_overlap(
250 std::get<XV>(sr), std::get<YV>(sr), std::get<RADIUS>(sr), xlow, xhigh, ylow, yhigh);
251 return xval >= xlow && xval <= xhigh && yval >= ylow && yval <= yhigh;
252 }
253
cancel_submaps(int & xval,int & yval,double & radius,int indx)254 void cancel_submaps(int &xval, int &yval, double &radius, int indx) {
255 int xlow;
256 int xhigh;
257 int ylow;
258 int yhigh;
259 find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
260 for (int x = xlow; x <= xhigh; ++x) {
261 for (int y = ylow; y <= yhigh; ++y) {
262 (*g_array[indx])[x][y]->cancel();
263 }
264 }
265 #if EXTRA_DEBUG
266 ++cancel_count[(xhigh - xlow + 1) * (yhigh - ylow + 1)];
267 #endif
268 }
269
restart_submaps(int & xval,int & yval,double & radius,int indx,SOM_element & vector)270 void restart_submaps(int &xval, int &yval, double &radius, int indx, SOM_element &vector) {
271 int xlow;
272 int xhigh;
273 int ylow;
274 int yhigh;
275 find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
276 for (int x = xlow; x <= xhigh; ++x) {
277 for (int y = ylow; y <= yhigh; ++y) {
278 // have to reset the graph
279 (*g_array[indx])[x][y]->reset();
280 // and re-submit the exemplar for search.
281 (*s_array[indx])[x][y]->try_put(vector);
282 }
283 }
284 }
285
graph_BMU(int indx)286 search_result_type graph_BMU(int indx) { // indx ranges over [0 .. SPECULATION_CNT -1]
287 wait_for_all_graphs(indx); // wait for the array of subgraphs
288 (g[indx])->wait_for_all();
289 std::vector<search_result_type> all_srs(xRangeMax * yRangeMax,
290 search_result_type(DBL_MAX, -1, -1));
291 #if EXTRA_DEBUG
292 int extra_computations = 0;
293 #endif
294 search_result_type sr;
295 search_result_type min_sr;
296 std::get<RADIUS>(min_sr) = DBL_MAX;
297 int result_count = 0;
298 while ((q[indx])->try_get(sr)) {
299 ++result_count;
300 // figure which submap this came from
301 int x = std::get<XV>(sr) / xsize;
302 int y = std::get<YV>(sr) / ysize;
303 #if EXTRA_DEBUG
304 if (x < 0 || x >= xranges)
305 printf(" ### x value out of range (%d)\n", x);
306 if (y < 0 || y >= yranges)
307 printf(" ### y value out of range (%d)\n", y);
308 #endif
309 int offset = x * yranges + y; // linearized subscript
310 #if EXTRA_DEBUG
311 if (std::get<RADIUS>(all_srs[offset]) !=
312 DBL_MAX) { // we've already got a result from this subsquare
313 ++extra_computations;
314 }
315 else if (std::get<XV>(all_srs[offset]) != -1) {
316 if (extra_debug)
317 printf("More than one cancellation of [%d,%d] iteration %d\n", x, y, global_i);
318 }
319 #endif
320 all_srs[offset] = sr;
321 if (std::get<RADIUS>(sr) < std::get<RADIUS>(min_sr))
322 min_sr = sr;
323 else if (std::get<RADIUS>(sr) == std::get<RADIUS>(min_sr)) {
324 if (std::get<XV>(sr) < std::get<XV>(min_sr)) {
325 min_sr = sr;
326 }
327 else if ((std::get<XV>(sr) == std::get<XV>(min_sr) &&
328 std::get<YV>(sr) < std::get<YV>(min_sr))) {
329 min_sr = sr;
330 }
331 }
332 }
333 #if EXTRA_DEBUG
334 if (result_count != xranges * yranges + extra_computations) {
335 // we are missing at least one of the expected results. Tally the missing values
336 for (int i = 0; i < xranges * yranges; ++i) {
337 if (std::get<RADIUS>(all_srs[i]) == DBL_MAX) {
338 // i == x*yranges + y
339 int xval = i / yranges;
340 int yval = i % yranges;
341 bool received_cancel_result = std::get<XV>(all_srs[i]) != -1;
342 if (overlap(xval, yval, last_update)) {
343 // we have previously canceled this subsquare.
344 printf("No result for [%d,%d] which was canceled(%s)\n",
345 xval,
346 yval,
347 received_cancel_result ? "T" : "F");
348 ++canceled_before[i];
349 }
350 else {
351 printf("No result for [%d,%d] which was not canceled(%s)\n",
352 xval,
353 yval,
354 received_cancel_result ? "T" : "F");
355 }
356 ++missing_count[i];
357 }
358 }
359 }
360 if (extra_computations)
361 ++extra_count[extra_computations];
362 #endif
363 return min_sr;
364 // end of one epoch
365 }
366
graph_teach(SOMap & map1,teaching_vector_type & in,oneapi::tbb::task_arena & ta)367 void graph_teach(SOMap &map1, teaching_vector_type &in, oneapi::tbb::task_arena &ta) {
368 build_BMU_graph(map1, ta);
369 #if EXTRA_DEBUG
370 cancel_count.clear();
371 extra_count.clear();
372 missing_count.clear();
373 canceled_before.clear();
374 cancel_count.reserve(xRangeMax * yRangeMax + 1);
375 extra_count.reserve(xRangeMax * yRangeMax + 1);
376 missing_count.reserve(xRangeMax * yRangeMax + 1);
377 canceled_before.reserve(xRangeMax * yRangeMax + 1);
378 for (int i = 0; i < xRangeMax * yRangeMax + 1; ++i) {
379 cancel_count.push_back(0);
380 extra_count.push_back(0);
381 missing_count.push_back(0);
382 canceled_before.push_back(0);
383 }
384 #endif
385 // normally the training would pick random exemplars to teach the SOM. We need
386 // the process to be reproducible, so we will pick the exemplars in order, [0, in.size())
387 int next_j = 0;
388 for (int epoch = 0; epoch < nPasses; ++epoch) {
389 global_i = epoch;
390 bool canceled_submaps = false;
391 int j = next_j; // try to make reproducible
392 next_j = (epoch + 1) % in.size();
393 search_result_type min_sr;
394 if (epoch < speculation_start) {
395 (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
396 }
397 else if (epoch == speculation_start) {
398 (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
399 if (epoch < nPasses - 1) {
400 (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
401 }
402 }
403 else if (epoch < nPasses - 1) {
404 (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
405 }
406 min_sr = graph_BMU(epoch % SPECULATION_CNT); //calls wait_for_all()
407 double min_distance = std::get<0>(min_sr);
408 double radius = max_radius * exp(-(double)epoch * radius_decay_rate);
409 double learning_rate = max_learning_rate * exp(-(double)epoch * learning_decay_rate);
410 if (epoch >= speculation_start && epoch < (nPasses - 1)) {
411 // have to cancel the affected submaps
412 cancel_submaps(
413 std::get<XV>(min_sr), std::get<YV>(min_sr), radius, (epoch + 1) % SPECULATION_CNT);
414 canceled_submaps = true;
415 }
416 map1.epoch_update(
417 in[j], epoch, std::get<1>(min_sr), std::get<2>(min_sr), radius, learning_rate);
418 ++global_i;
419 if (canceled_submaps) {
420 // do I have to wait for all the non-canceled speculative graph to complete first?
421 // yes, in case a canceled task was already executing.
422 wait_for_all_graphs((epoch + 1) % SPECULATION_CNT); // wait for the array of subgraphs
423 restart_submaps(std::get<1>(min_sr),
424 std::get<2>(min_sr),
425 radius,
426 (epoch + 1) % SPECULATION_CNT,
427 in[next_j]);
428 }
429
430 last_update = min_sr;
431 std::get<RADIUS>(last_update) = radius; // not smallest value, but range of effect
432 }
433 destroy_BMU_graph();
434 }
435
436 static const double serial_time_adjust = 1.25;
437 static double radius_fraction = 3.0;
438
main(int argc,char * argv[])439 int main(int argc, char *argv[]) {
440 int l_speculation_start;
441 utility::thread_number_range threads(
442 utility::get_default_num_threads,
443 utility::
444 get_default_num_threads() // run only the default number of threads if none specified
445 );
446
447 utility::parse_cli_arguments(
448 argc,
449 argv,
450 utility::cli_argument_pack()
451 //"-h" option for for displaying help is present implicitly
452 .positional_arg(
453 threads,
454 "n-of-threads",
455 "number of threads to use; a range of the form low[:high], where low and optional high are non-negative integers or 'auto' for the TBB default.")
456 // .positional_arg(InputFileName,"input-file","input file name")
457 // .positional_arg(OutputFileName,"output-file","output file name")
458 .positional_arg(
459 radius_fraction, "radius-fraction", "size of radius at which to start speculating")
460 .positional_arg(
461 nPasses, "number-of-epochs", "number of examples used in learning phase")
462 .arg(cancel_test, "cancel-test", "test for cancel signal while finding BMU")
463 .arg(extra_debug, "debug", "additional output")
464 .arg(dont_speculate, "nospeculate", "don't speculate in SOM map teaching"));
465
466 readInputData();
467 max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
468 // need this value for the 1x1 timing below
469 radius_decay_rate = -(log(1.0 / (double)max_radius) / (double)nPasses);
470 find_data_ranges(my_teaching, max_range, min_range);
471 if (extra_debug) {
472 printf("Data range: ");
473 remark_SOM_element(min_range);
474 printf(" to ");
475 remark_SOM_element(max_range);
476 printf("\n");
477 }
478
479 // find how much time is taken for the single function_node case.
480 // adjust nPasses so the 1x1 time is somewhere around serial_time_adjust seconds.
481 // make sure the example test runs for at least 0.5 second.
482 for (;;) {
483 // Restrict max concurrency level via task_arena interface
484 oneapi::tbb::task_arena ta(1);
485 SOMap map1(xMax, yMax);
486 speculation_start = nPasses + 1; // Don't speculate
487
488 xranges = 1;
489 yranges = 1;
490 map1.initialize(InitializeGradient, max_range, min_range);
491 oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
492 graph_teach(map1, my_teaching, ta);
493 oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
494 double nSeconds = (t1 - t0).seconds();
495 if (nSeconds < 0.5) {
496 xMax *= 2;
497 yMax *= 2;
498 continue;
499 }
500 double size_adjust = sqrt(serial_time_adjust / nSeconds);
501 xMax = (int)((double)xMax * size_adjust);
502 yMax = (int)((double)yMax * size_adjust);
503 max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
504 radius_decay_rate = log((double)max_radius) / (double)nPasses;
505
506 if (extra_debug) {
507 printf("original 1x1 case ran in %g seconds\n", nSeconds);
508 printf(" Size of table == %d x %d\n", xMax, yMax);
509 printf(" radius_decay_rate == %g\n", radius_decay_rate);
510 }
511 break;
512 }
513
514 // the "max_radius" starts at 1/2*radius_fraction the table size. To start the speculation when the radius is
515 // 1 / n * the table size, the constant in the log below should be n / 2. so 2 == 1/4, 3 == 1/6th,
516 // et c.
517 if (dont_speculate) {
518 l_speculation_start = nPasses + 1;
519 if (extra_debug)
520 printf("speculation will not be done\n");
521 }
522 else {
523 if (radius_fraction < 1.0) {
524 if (extra_debug)
525 printf("Warning: radius_fraction should be >= 1. Setting to 1.\n");
526 radius_fraction = 1.0;
527 }
528 l_speculation_start = (int)((double)nPasses * log(radius_fraction) / log((double)nPasses));
529 if (extra_debug)
530 printf("We will start speculation at iteration %d\n", l_speculation_start);
531 }
532 double single_time; // for speedup calculations
533 #if EXTRA_DEBUG
534 // storage for the single-subrange answers, for comparing maps
535 std::vector<double> single_dist;
536 single_dist.reserve(my_teaching.size());
537 std::vector<int> single_xval;
538 single_xval.reserve(my_teaching.size());
539 std::vector<int> single_yval;
540 single_yval.reserve(my_teaching.size());
541 #endif
542 //TODO: Investigate how to not require mandatory concurrency
543 for (int p = std::max(threads.first, 2); p <= std::max(threads.last, 2); ++p) {
544 // Restrict max concurrency level via task_arena interface
545 oneapi::tbb::global_control limit(oneapi::tbb::global_control::max_allowed_parallelism, p);
546 oneapi::tbb::task_arena ta(p);
547 if (extra_debug)
548 printf(" -------------- Running with %d threads. ------------\n", p);
549 // run the SOM build for a series of subranges
550 for (xranges = 1; xranges <= xRangeMax; ++xranges) {
551 for (yranges = xranges; yranges <= yRangeMax; ++yranges) {
552 if (xranges == 1 && yranges == 1) {
553 // don't pointlessly speculate if we're only running one subrange.
554 speculation_start = nPasses + 1;
555 }
556 else {
557 speculation_start = l_speculation_start;
558 }
559 SOMap map1(xMax, yMax);
560 map1.initialize(InitializeGradient, max_range, min_range);
561
562 if (extra_debug)
563 printf("Start learning for [%d,%d] ----------- \n", xranges, yranges);
564 oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
565 graph_teach(map1, my_teaching, ta);
566 oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
567
568 if (extra_debug)
569 printf("Done learning for [%d,%d], which took %g seconds ",
570 xranges,
571 yranges,
572 (t1 - t0).seconds());
573 if (xranges == 1 && yranges == 1)
574 single_time = (t1 - t0).seconds();
575 if (extra_debug)
576 printf(": speedup == %g\n", single_time / (t1 - t0).seconds());
577
578 #if EXTRA_DEBUG
579 if (extra_debug) {
580 // number of times cancel was called, indexed by number of subranges canceled
581 for (int i = 0; i < cancel_count.size(); ++i) {
582 // only write output if we have a non-zero value.
583 if (cancel_count[i] > 0) {
584 int totalcnt = 0;
585 printf(" cancellations: ");
586 for (int j = 0; j < cancel_count.size(); ++j) {
587 if (cancel_count[j]) {
588 printf(" %d [%d]", j, cancel_count[j]);
589 totalcnt += cancel_count[j];
590 }
591 }
592 totalcnt += speculation_start;
593 printf(" for a total of %d\n", totalcnt);
594 break; // from for
595 }
596 }
597
598 // number of extra results (these occur when the subrange task starts before
599 // cancel is received.)
600 for (int i = 0; i < extra_count.size(); ++i) {
601 if (extra_count[i] > 0) {
602 int totalcnt = 0;
603 printf("extra computations: ");
604 for (int j = 0; j < extra_count.size(); ++j) {
605 if (extra_count[j]) {
606 printf(" %d[%d]", j, extra_count[j]);
607 totalcnt += extra_count[j];
608 }
609 }
610 totalcnt += speculation_start;
611 printf(" for a total of %d\n", totalcnt);
612 break; // from for
613 }
614 }
615
616 // here we count the number of times we looked for a particular subrange when fetching
617 // the queue_node output and didn't find anything. This may occur when a function_node
618 // is "stuck" and doesn't process some number of exemplars. function_node_execs is
619 // a count of the number of times the corresponding function_node was executed (in
620 // case the problem is dropped output in the queue_node.)
621 for (int i = 0; i < missing_count.size(); ++i) {
622 if (missing_count[i]) {
623 int xval = i / yranges;
624 int yval = i % yranges;
625 printf(" f_node[%d,%d] missed %d values", xval, yval, missing_count[i]);
626 if (canceled_before[i]) {
627 printf(" canceled_before == %d", canceled_before[i]);
628 }
629 printf(", fn_tally == %d\n", function_node_execs[i]);
630 }
631 }
632 }
633
634 // check that output matches the 1x1 case
635 for (int i = 0; i < my_teaching.size(); ++i) {
636 int xdist;
637 int ydist;
638 double my_dist = map1.BMU(my_teaching[i], xdist, ydist);
639 if (xranges == 1 && yranges == 1) {
640 single_dist.push_back(my_dist);
641 single_xval.push_back(xdist);
642 single_yval.push_back(ydist);
643 }
644 else {
645 if (single_dist[i] != my_dist || single_xval[i] != xdist ||
646 single_yval[i] != ydist)
647 printf(
648 "Error in output: expecting <%g, %d, %d>, but got <%g, %d, %d>\n",
649 single_dist[i],
650 single_xval[i],
651 single_yval[i],
652 my_dist,
653 xdist,
654 ydist);
655 }
656 }
657 #endif
658 } // yranges
659 } // xranges
660 } // #threads p
661 printf("done\n");
662 return 0;
663 }
664