1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // 18 // Self-organizing map in TBB flow::graph 19 // 20 // This is an example of the use of cancellation in a graph. After a point in searching for 21 // the best match for an example, two examples are looked for simultaneously. When the 22 // earlier example is found and the update radius is determined, the affected searches 23 // for the subsequent example are cancelled, and after the update they are restarted. 24 // As the update radius shrinks fewer searches are cancelled, and by the last iterations 25 // virtually all the work done for the speculating example is useful. 26 // 27 // first, a simple implementation with only one example vector 28 // at a time. 29 // 30 // we will do a color map (the simple example.) 31 // 32 // graph algorithm 33 // 34 // for some number of iterations 35 // update radius r, weight of change L 36 // for each example V 37 // use graph to find BMU 38 // for each part of map within radius of BMU W 39 // update vector: W(t+1) = W(t) + w(dist)*L*(V - W(t)) 40 41 #ifndef NOMINMAX 42 #define NOMINMAX 43 #endif // NOMINMAX 44 45 #include <algorithm> 46 47 #define _MAIN_C_ 1 48 #include "som.hpp" 49 50 #include "oneapi/tbb/flow_graph.h" 51 #include "oneapi/tbb/blocked_range2d.h" 52 #include "oneapi/tbb/tick_count.h" 53 #include "oneapi/tbb/task_arena.h" 54 #include "oneapi/tbb/global_control.h" 55 56 #include "common/utility/utility.hpp" 57 #include "common/utility/get_default_num_threads.hpp" 58 59 #define RED 0 60 #define GREEN 1 61 #define BLUE 2 62 63 static int xranges = 1; 64 static int yranges = 1; 65 static int xsize = -1; 66 static int ysize = -1; 67 68 static int global_i = 0; 69 static int speculation_start; 70 71 #if EXTRA_DEBUG 72 std::vector<int> cancel_count; 73 std::vector<int> extra_count; 74 std::vector<int> missing_count; 75 std::vector<int> canceled_before; 76 #endif 77 std::vector<int> function_node_execs; 78 static int xRangeMax = 3; 79 static int yRangeMax = 3; 80 static bool dont_speculate = false; 81 static search_result_type last_update; 82 83 class BMU_search_body { 84 SOMap &my_map; 85 subsquare_type my_square; 86 int &fn_tally; 87 88 public: 89 BMU_search_body(SOMap &_m, subsquare_type &_sq, int &fnt) 90 : my_map(_m), 91 my_square(_sq), 92 fn_tally(fnt) {} 93 BMU_search_body(const BMU_search_body &other) 94 : my_map(other.my_map), 95 my_square(other.my_square), 96 fn_tally(other.fn_tally) {} 97 search_result_type operator()(const SOM_element s) { 98 int my_x; 99 int my_y; 100 double min_dist = my_map.BMU_range(s, my_x, my_y, my_square); 101 ++fn_tally; // count how many times this function_node executed 102 return search_result_type(min_dist, my_x, my_y); 103 } 104 }; 105 106 typedef oneapi::tbb::flow::function_node<SOM_element, search_result_type> search_node; 107 typedef oneapi::tbb::flow::broadcast_node<SOM_element> b_node; 108 typedef std::vector<search_node *> search_node_vector_type; 109 typedef std::vector<search_node_vector_type> search_node_array_type; 110 typedef std::vector<oneapi::tbb::flow::graph *> graph_vector_type; 111 typedef std::vector<graph_vector_type> graph_array_type; 112 113 #define SPECULATION_CNT 2 114 115 oneapi::tbb::flow::graph *g[SPECULATION_CNT]; // main graph; there should only be one per epoch 116 b_node *send_to[SPECULATION_CNT]; // broadcast node to send exemplar to all function_nodes 117 oneapi::tbb::flow::queue_node<search_result_type> 118 *q[SPECULATION_CNT]; // queue for function nodes to put their results in 119 // each function_node should have its own graph 120 search_node_array_type *s_array[SPECULATION_CNT]; // 2d array of function nodes 121 graph_array_type *g_array[SPECULATION_CNT]; // 2d array of graphs 122 123 // All graphs must locate in the same arena. 124 oneapi::tbb::flow::graph *construct_graph(oneapi::tbb::task_arena &ta) { 125 oneapi::tbb::flow::graph *result; 126 ta.execute([&result] { 127 result = new oneapi::tbb::flow::graph(); 128 }); 129 return result; 130 } 131 132 // build a set of SPECULATION_CNT graphs, each of which consists of a broadcast_node, 133 // xranges x yranges function_nodes, and one queue_node for output. 134 // once speculation starts, if i % SPECULATION_CNT is the current graph, (i+1) % SPECULATION_CNT 135 // is the first speculation, and so on. 136 void build_BMU_graph(SOMap &map1, oneapi::tbb::task_arena &ta) { 137 // build current graph 138 xsize = ((int)map1.size() + xranges - 1) / xranges; 139 ysize = ((int)map1[0].size() + yranges - 1) / yranges; 140 function_node_execs.clear(); 141 function_node_execs.reserve(xranges * yranges + 1); 142 for (int i = 0; i < xranges * yranges + 1; ++i) 143 function_node_execs.push_back(0); 144 145 for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) { 146 g[scnt] = construct_graph(ta); 147 send_to[scnt] = new b_node(*(g[scnt])); // broadcast node to the function_nodes 148 q[scnt] = new oneapi::tbb::flow::queue_node<search_result_type>(*(g[scnt])); // output queue 149 150 // create the function_nodes, tie to the graph 151 s_array[scnt] = new search_node_array_type; 152 s_array[scnt]->reserve(xranges); 153 g_array[scnt] = new graph_array_type; 154 g_array[scnt]->reserve(xranges); 155 for (int i = 0; i < (int)map1.size(); i += xsize) { 156 int xindex = i / xsize; 157 s_array[scnt]->push_back(search_node_vector_type()); 158 #if EXTRA_DEBUG 159 if (s_array[scnt]->size() != xindex + 1) { 160 printf("Error; s_array[%d]->size() == %d, xindex== %d\n", 161 scnt, 162 (int)(s_array[scnt]->size()), 163 xindex); 164 } 165 #endif 166 (*s_array[scnt])[xindex].reserve(yranges); 167 g_array[scnt]->push_back(graph_vector_type()); 168 (*g_array[scnt])[xindex].reserve(yranges); 169 for (int j = 0; j < (int)map1[0].size(); j += ysize) { 170 int offset = (i / xsize) * yranges + (j / ysize); 171 int xmax = (i + xsize) > (int)map1.size() ? (int)map1.size() : i + xsize; 172 int ymax = (j + ysize) > (int)map1[0].size() ? (int)map1[0].size() : j + ysize; 173 subsquare_type sst(i, xmax, 1, j, ymax, 1); 174 BMU_search_body bb(map1, sst, function_node_execs[offset]); 175 oneapi::tbb::flow::graph *g_local = construct_graph(ta); 176 search_node *s = 177 new search_node(*g_local, oneapi::tbb::flow::serial, bb); // copies Body 178 (*g_array[scnt])[xindex].push_back(g_local); 179 (*s_array[scnt])[xindex].push_back(s); 180 oneapi::tbb::flow::make_edge(*(send_to[scnt]), 181 *s); // broadcast_node -> function_node 182 oneapi::tbb::flow::make_edge(*s, *(q[scnt])); // function_node -> queue_node 183 } 184 } 185 } 186 } 187 188 // Wait for the 2D array of flow::graphs. 189 void wait_for_all_graphs(int cIndex) { // cIndex ranges over [0 .. SPECULATION_CNT - 1] 190 for (int x = 0; x < xranges; ++x) { 191 for (int y = 0; y < yranges; ++y) { 192 (*g_array[cIndex])[x][y]->wait_for_all(); 193 } 194 } 195 } 196 197 void destroy_BMU_graph() { 198 for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) { 199 for (int i = 0; i < (int)(*s_array[scnt]).size(); ++i) { 200 for (int j = 0; j < (int)(*s_array[scnt])[i].size(); ++j) { 201 delete (*s_array[scnt])[i][j]; 202 delete (*g_array[scnt])[i][j]; 203 } 204 } 205 (*s_array[scnt]).clear(); 206 delete s_array[scnt]; 207 (*g_array[scnt]).clear(); 208 delete g_array[scnt]; 209 delete q[scnt]; 210 delete send_to[scnt]; 211 delete g[scnt]; 212 } 213 } 214 215 void find_subrange_overlap(int const &xval, 216 int const &yval, 217 double const &radius, 218 int &xlow, 219 int &xhigh, 220 int &ylow, 221 int &yhigh) { 222 xlow = int((xval - radius) / xsize); 223 xhigh = int((xval + radius) / xsize); 224 ylow = int((yval - radius) / ysize); 225 yhigh = int((yval + radius) / ysize); 226 // circle may fall partly outside map 227 if (xlow < 0) 228 xlow = 0; 229 if (xhigh >= xranges) 230 xhigh = xranges - 1; 231 if (ylow < 0) 232 ylow = 0; 233 if (yhigh >= yranges) 234 yhigh = yranges - 1; 235 #if EXTRA_DEBUG 236 if (xlow >= xranges) 237 printf(" Error *** xlow == %d\n", xlow); 238 if (xhigh < 0) 239 printf("Error *** xhigh == %d\n", xhigh); 240 if (ylow >= yranges) 241 printf("Error *** ylow == %d\n", ylow); 242 if (yhigh < 0) 243 printf("Error *** yhigh == %d\n", yhigh); 244 #endif 245 } 246 247 bool overlap(int &xval, int &yval, search_result_type &sr) { 248 int xlow, xhigh, ylow, yhigh; 249 find_subrange_overlap( 250 std::get<XV>(sr), std::get<YV>(sr), std::get<RADIUS>(sr), xlow, xhigh, ylow, yhigh); 251 return xval >= xlow && xval <= xhigh && yval >= ylow && yval <= yhigh; 252 } 253 254 void cancel_submaps(int &xval, int &yval, double &radius, int indx) { 255 int xlow; 256 int xhigh; 257 int ylow; 258 int yhigh; 259 find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh); 260 for (int x = xlow; x <= xhigh; ++x) { 261 for (int y = ylow; y <= yhigh; ++y) { 262 (*g_array[indx])[x][y]->cancel(); 263 } 264 } 265 #if EXTRA_DEBUG 266 ++cancel_count[(xhigh - xlow + 1) * (yhigh - ylow + 1)]; 267 #endif 268 } 269 270 void restart_submaps(int &xval, int &yval, double &radius, int indx, SOM_element &vector) { 271 int xlow; 272 int xhigh; 273 int ylow; 274 int yhigh; 275 find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh); 276 for (int x = xlow; x <= xhigh; ++x) { 277 for (int y = ylow; y <= yhigh; ++y) { 278 // have to reset the graph 279 (*g_array[indx])[x][y]->reset(); 280 // and re-submit the exemplar for search. 281 (*s_array[indx])[x][y]->try_put(vector); 282 } 283 } 284 } 285 286 search_result_type graph_BMU(int indx) { // indx ranges over [0 .. SPECULATION_CNT -1] 287 wait_for_all_graphs(indx); // wait for the array of subgraphs 288 (g[indx])->wait_for_all(); 289 std::vector<search_result_type> all_srs(xRangeMax * yRangeMax, 290 search_result_type(DBL_MAX, -1, -1)); 291 #if EXTRA_DEBUG 292 int extra_computations = 0; 293 #endif 294 search_result_type sr; 295 search_result_type min_sr; 296 std::get<RADIUS>(min_sr) = DBL_MAX; 297 int result_count = 0; 298 while ((q[indx])->try_get(sr)) { 299 ++result_count; 300 // figure which submap this came from 301 int x = std::get<XV>(sr) / xsize; 302 int y = std::get<YV>(sr) / ysize; 303 #if EXTRA_DEBUG 304 if (x < 0 || x >= xranges) 305 printf(" ### x value out of range (%d)\n", x); 306 if (y < 0 || y >= yranges) 307 printf(" ### y value out of range (%d)\n", y); 308 #endif 309 int offset = x * yranges + y; // linearized subscript 310 #if EXTRA_DEBUG 311 if (std::get<RADIUS>(all_srs[offset]) != 312 DBL_MAX) { // we've already got a result from this subsquare 313 ++extra_computations; 314 } 315 else if (std::get<XV>(all_srs[offset]) != -1) { 316 if (extra_debug) 317 printf("More than one cancellation of [%d,%d] iteration %d\n", x, y, global_i); 318 } 319 #endif 320 all_srs[offset] = sr; 321 if (std::get<RADIUS>(sr) < std::get<RADIUS>(min_sr)) 322 min_sr = sr; 323 else if (std::get<RADIUS>(sr) == std::get<RADIUS>(min_sr)) { 324 if (std::get<XV>(sr) < std::get<XV>(min_sr)) { 325 min_sr = sr; 326 } 327 else if ((std::get<XV>(sr) == std::get<XV>(min_sr) && 328 std::get<YV>(sr) < std::get<YV>(min_sr))) { 329 min_sr = sr; 330 } 331 } 332 } 333 #if EXTRA_DEBUG 334 if (result_count != xranges * yranges + extra_computations) { 335 // we are missing at least one of the expected results. Tally the missing values 336 for (int i = 0; i < xranges * yranges; ++i) { 337 if (std::get<RADIUS>(all_srs[i]) == DBL_MAX) { 338 // i == x*yranges + y 339 int xval = i / yranges; 340 int yval = i % yranges; 341 bool received_cancel_result = std::get<XV>(all_srs[i]) != -1; 342 if (overlap(xval, yval, last_update)) { 343 // we have previously canceled this subsquare. 344 printf("No result for [%d,%d] which was canceled(%s)\n", 345 xval, 346 yval, 347 received_cancel_result ? "T" : "F"); 348 ++canceled_before[i]; 349 } 350 else { 351 printf("No result for [%d,%d] which was not canceled(%s)\n", 352 xval, 353 yval, 354 received_cancel_result ? "T" : "F"); 355 } 356 ++missing_count[i]; 357 } 358 } 359 } 360 if (extra_computations) 361 ++extra_count[extra_computations]; 362 #endif 363 return min_sr; 364 // end of one epoch 365 } 366 367 void graph_teach(SOMap &map1, teaching_vector_type &in, oneapi::tbb::task_arena &ta) { 368 build_BMU_graph(map1, ta); 369 #if EXTRA_DEBUG 370 cancel_count.clear(); 371 extra_count.clear(); 372 missing_count.clear(); 373 canceled_before.clear(); 374 cancel_count.reserve(xRangeMax * yRangeMax + 1); 375 extra_count.reserve(xRangeMax * yRangeMax + 1); 376 missing_count.reserve(xRangeMax * yRangeMax + 1); 377 canceled_before.reserve(xRangeMax * yRangeMax + 1); 378 for (int i = 0; i < xRangeMax * yRangeMax + 1; ++i) { 379 cancel_count.push_back(0); 380 extra_count.push_back(0); 381 missing_count.push_back(0); 382 canceled_before.push_back(0); 383 } 384 #endif 385 // normally the training would pick random exemplars to teach the SOM. We need 386 // the process to be reproducible, so we will pick the exemplars in order, [0, in.size()) 387 int next_j = 0; 388 for (int epoch = 0; epoch < nPasses; ++epoch) { 389 global_i = epoch; 390 bool canceled_submaps = false; 391 int j = next_j; // try to make reproducible 392 next_j = (epoch + 1) % in.size(); 393 search_result_type min_sr; 394 if (epoch < speculation_start) { 395 (send_to[epoch % SPECULATION_CNT])->try_put(in[j]); 396 } 397 else if (epoch == speculation_start) { 398 (send_to[epoch % SPECULATION_CNT])->try_put(in[j]); 399 if (epoch < nPasses - 1) { 400 (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]); 401 } 402 } 403 else if (epoch < nPasses - 1) { 404 (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]); 405 } 406 min_sr = graph_BMU(epoch % SPECULATION_CNT); //calls wait_for_all() 407 double min_distance = std::get<0>(min_sr); 408 double radius = max_radius * exp(-(double)epoch * radius_decay_rate); 409 double learning_rate = max_learning_rate * exp(-(double)epoch * learning_decay_rate); 410 if (epoch >= speculation_start && epoch < (nPasses - 1)) { 411 // have to cancel the affected submaps 412 cancel_submaps( 413 std::get<XV>(min_sr), std::get<YV>(min_sr), radius, (epoch + 1) % SPECULATION_CNT); 414 canceled_submaps = true; 415 } 416 map1.epoch_update( 417 in[j], epoch, std::get<1>(min_sr), std::get<2>(min_sr), radius, learning_rate); 418 ++global_i; 419 if (canceled_submaps) { 420 // do I have to wait for all the non-canceled speculative graph to complete first? 421 // yes, in case a canceled task was already executing. 422 wait_for_all_graphs((epoch + 1) % SPECULATION_CNT); // wait for the array of subgraphs 423 restart_submaps(std::get<1>(min_sr), 424 std::get<2>(min_sr), 425 radius, 426 (epoch + 1) % SPECULATION_CNT, 427 in[next_j]); 428 } 429 430 last_update = min_sr; 431 std::get<RADIUS>(last_update) = radius; // not smallest value, but range of effect 432 } 433 destroy_BMU_graph(); 434 } 435 436 static const double serial_time_adjust = 1.25; 437 static double radius_fraction = 3.0; 438 439 int main(int argc, char *argv[]) { 440 int l_speculation_start; 441 utility::thread_number_range threads( 442 utility::get_default_num_threads, 443 utility:: 444 get_default_num_threads() // run only the default number of threads if none specified 445 ); 446 447 utility::parse_cli_arguments( 448 argc, 449 argv, 450 utility::cli_argument_pack() 451 //"-h" option for for displaying help is present implicitly 452 .positional_arg( 453 threads, 454 "n-of-threads", 455 "number of threads to use; a range of the form low[:high], where low and optional high are non-negative integers or 'auto' for the TBB default.") 456 // .positional_arg(InputFileName,"input-file","input file name") 457 // .positional_arg(OutputFileName,"output-file","output file name") 458 .positional_arg( 459 radius_fraction, "radius-fraction", "size of radius at which to start speculating") 460 .positional_arg( 461 nPasses, "number-of-epochs", "number of examples used in learning phase") 462 .arg(cancel_test, "cancel-test", "test for cancel signal while finding BMU") 463 .arg(extra_debug, "debug", "additional output") 464 .arg(dont_speculate, "nospeculate", "don't speculate in SOM map teaching")); 465 466 readInputData(); 467 max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2; 468 // need this value for the 1x1 timing below 469 radius_decay_rate = -(log(1.0 / (double)max_radius) / (double)nPasses); 470 find_data_ranges(my_teaching, max_range, min_range); 471 if (extra_debug) { 472 printf("Data range: "); 473 remark_SOM_element(min_range); 474 printf(" to "); 475 remark_SOM_element(max_range); 476 printf("\n"); 477 } 478 479 // find how much time is taken for the single function_node case. 480 // adjust nPasses so the 1x1 time is somewhere around serial_time_adjust seconds. 481 // make sure the example test runs for at least 0.5 second. 482 for (;;) { 483 // Restrict max concurrency level via task_arena interface 484 oneapi::tbb::task_arena ta(1); 485 SOMap map1(xMax, yMax); 486 speculation_start = nPasses + 1; // Don't speculate 487 488 xranges = 1; 489 yranges = 1; 490 map1.initialize(InitializeGradient, max_range, min_range); 491 oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now(); 492 graph_teach(map1, my_teaching, ta); 493 oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now(); 494 double nSeconds = (t1 - t0).seconds(); 495 if (nSeconds < 0.5) { 496 xMax *= 2; 497 yMax *= 2; 498 continue; 499 } 500 double size_adjust = sqrt(serial_time_adjust / nSeconds); 501 xMax = (int)((double)xMax * size_adjust); 502 yMax = (int)((double)yMax * size_adjust); 503 max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2; 504 radius_decay_rate = log((double)max_radius) / (double)nPasses; 505 506 if (extra_debug) { 507 printf("original 1x1 case ran in %g seconds\n", nSeconds); 508 printf(" Size of table == %d x %d\n", xMax, yMax); 509 printf(" radius_decay_rate == %g\n", radius_decay_rate); 510 } 511 break; 512 } 513 514 // the "max_radius" starts at 1/2*radius_fraction the table size. To start the speculation when the radius is 515 // 1 / n * the table size, the constant in the log below should be n / 2. so 2 == 1/4, 3 == 1/6th, 516 // et c. 517 if (dont_speculate) { 518 l_speculation_start = nPasses + 1; 519 if (extra_debug) 520 printf("speculation will not be done\n"); 521 } 522 else { 523 if (radius_fraction < 1.0) { 524 if (extra_debug) 525 printf("Warning: radius_fraction should be >= 1. Setting to 1.\n"); 526 radius_fraction = 1.0; 527 } 528 l_speculation_start = (int)((double)nPasses * log(radius_fraction) / log((double)nPasses)); 529 if (extra_debug) 530 printf("We will start speculation at iteration %d\n", l_speculation_start); 531 } 532 double single_time; // for speedup calculations 533 #if EXTRA_DEBUG 534 // storage for the single-subrange answers, for comparing maps 535 std::vector<double> single_dist; 536 single_dist.reserve(my_teaching.size()); 537 std::vector<int> single_xval; 538 single_xval.reserve(my_teaching.size()); 539 std::vector<int> single_yval; 540 single_yval.reserve(my_teaching.size()); 541 #endif 542 //TODO: Investigate how to not require mandatory concurrency 543 for (int p = std::max(threads.first, 2); p <= std::max(threads.last, 2); ++p) { 544 // Restrict max concurrency level via task_arena interface 545 oneapi::tbb::global_control limit(oneapi::tbb::global_control::max_allowed_parallelism, p); 546 oneapi::tbb::task_arena ta(p); 547 if (extra_debug) 548 printf(" -------------- Running with %d threads. ------------\n", p); 549 // run the SOM build for a series of subranges 550 for (xranges = 1; xranges <= xRangeMax; ++xranges) { 551 for (yranges = xranges; yranges <= yRangeMax; ++yranges) { 552 if (xranges == 1 && yranges == 1) { 553 // don't pointlessly speculate if we're only running one subrange. 554 speculation_start = nPasses + 1; 555 } 556 else { 557 speculation_start = l_speculation_start; 558 } 559 SOMap map1(xMax, yMax); 560 map1.initialize(InitializeGradient, max_range, min_range); 561 562 if (extra_debug) 563 printf("Start learning for [%d,%d] ----------- \n", xranges, yranges); 564 oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now(); 565 graph_teach(map1, my_teaching, ta); 566 oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now(); 567 568 if (extra_debug) 569 printf("Done learning for [%d,%d], which took %g seconds ", 570 xranges, 571 yranges, 572 (t1 - t0).seconds()); 573 if (xranges == 1 && yranges == 1) 574 single_time = (t1 - t0).seconds(); 575 if (extra_debug) 576 printf(": speedup == %g\n", single_time / (t1 - t0).seconds()); 577 578 #if EXTRA_DEBUG 579 if (extra_debug) { 580 // number of times cancel was called, indexed by number of subranges canceled 581 for (int i = 0; i < cancel_count.size(); ++i) { 582 // only write output if we have a non-zero value. 583 if (cancel_count[i] > 0) { 584 int totalcnt = 0; 585 printf(" cancellations: "); 586 for (int j = 0; j < cancel_count.size(); ++j) { 587 if (cancel_count[j]) { 588 printf(" %d [%d]", j, cancel_count[j]); 589 totalcnt += cancel_count[j]; 590 } 591 } 592 totalcnt += speculation_start; 593 printf(" for a total of %d\n", totalcnt); 594 break; // from for 595 } 596 } 597 598 // number of extra results (these occur when the subrange task starts before 599 // cancel is received.) 600 for (int i = 0; i < extra_count.size(); ++i) { 601 if (extra_count[i] > 0) { 602 int totalcnt = 0; 603 printf("extra computations: "); 604 for (int j = 0; j < extra_count.size(); ++j) { 605 if (extra_count[j]) { 606 printf(" %d[%d]", j, extra_count[j]); 607 totalcnt += extra_count[j]; 608 } 609 } 610 totalcnt += speculation_start; 611 printf(" for a total of %d\n", totalcnt); 612 break; // from for 613 } 614 } 615 616 // here we count the number of times we looked for a particular subrange when fetching 617 // the queue_node output and didn't find anything. This may occur when a function_node 618 // is "stuck" and doesn't process some number of exemplars. function_node_execs is 619 // a count of the number of times the corresponding function_node was executed (in 620 // case the problem is dropped output in the queue_node.) 621 for (int i = 0; i < missing_count.size(); ++i) { 622 if (missing_count[i]) { 623 int xval = i / yranges; 624 int yval = i % yranges; 625 printf(" f_node[%d,%d] missed %d values", xval, yval, missing_count[i]); 626 if (canceled_before[i]) { 627 printf(" canceled_before == %d", canceled_before[i]); 628 } 629 printf(", fn_tally == %d\n", function_node_execs[i]); 630 } 631 } 632 } 633 634 // check that output matches the 1x1 case 635 for (int i = 0; i < my_teaching.size(); ++i) { 636 int xdist; 637 int ydist; 638 double my_dist = map1.BMU(my_teaching[i], xdist, ydist); 639 if (xranges == 1 && yranges == 1) { 640 single_dist.push_back(my_dist); 641 single_xval.push_back(xdist); 642 single_yval.push_back(ydist); 643 } 644 else { 645 if (single_dist[i] != my_dist || single_xval[i] != xdist || 646 single_yval[i] != ydist) 647 printf( 648 "Error in output: expecting <%g, %d, %d>, but got <%g, %d, %d>\n", 649 single_dist[i], 650 single_xval[i], 651 single_yval[i], 652 my_dist, 653 xdist, 654 ydist); 655 } 656 } 657 #endif 658 } // yranges 659 } // xranges 660 } // #threads p 661 printf("done\n"); 662 return 0; 663 } 664