1d86ed7fbStbbdev /*
2b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev //
18d86ed7fbStbbdev // Self-organizing map in TBB flow::graph
19d86ed7fbStbbdev //
20d86ed7fbStbbdev // This is an example of the use of cancellation in a graph. After a point in searching for
21d86ed7fbStbbdev // the best match for an example, two examples are looked for simultaneously. When the
22d86ed7fbStbbdev // earlier example is found and the update radius is determined, the affected searches
23d86ed7fbStbbdev // for the subsequent example are cancelled, and after the update they are restarted.
24d86ed7fbStbbdev // As the update radius shrinks fewer searches are cancelled, and by the last iterations
25d86ed7fbStbbdev // virtually all the work done for the speculating example is useful.
26d86ed7fbStbbdev //
27d86ed7fbStbbdev // first, a simple implementation with only one example vector
28d86ed7fbStbbdev // at a time.
29d86ed7fbStbbdev //
30d86ed7fbStbbdev // we will do a color map (the simple example.)
31d86ed7fbStbbdev //
32d86ed7fbStbbdev // graph algorithm
33d86ed7fbStbbdev //
34d86ed7fbStbbdev // for some number of iterations
35d86ed7fbStbbdev // update radius r, weight of change L
36d86ed7fbStbbdev // for each example V
37d86ed7fbStbbdev // use graph to find BMU
38d86ed7fbStbbdev // for each part of map within radius of BMU W
39d86ed7fbStbbdev // update vector: W(t+1) = W(t) + w(dist)*L*(V - W(t))
40d86ed7fbStbbdev
41*e1b24895SIvan Kochin #ifndef NOMINMAX
42*e1b24895SIvan Kochin #define NOMINMAX
43*e1b24895SIvan Kochin #endif // NOMINMAX
44*e1b24895SIvan Kochin
45*e1b24895SIvan Kochin #include <algorithm>
46*e1b24895SIvan Kochin
47d86ed7fbStbbdev #define _MAIN_C_ 1
48d86ed7fbStbbdev #include "som.hpp"
49d86ed7fbStbbdev
50d86ed7fbStbbdev #include "oneapi/tbb/flow_graph.h"
51d86ed7fbStbbdev #include "oneapi/tbb/blocked_range2d.h"
52d86ed7fbStbbdev #include "oneapi/tbb/tick_count.h"
53d86ed7fbStbbdev #include "oneapi/tbb/task_arena.h"
54*e1b24895SIvan Kochin #include "oneapi/tbb/global_control.h"
55d86ed7fbStbbdev
56d86ed7fbStbbdev #include "common/utility/utility.hpp"
57d86ed7fbStbbdev #include "common/utility/get_default_num_threads.hpp"
58d86ed7fbStbbdev
59d86ed7fbStbbdev #define RED 0
60d86ed7fbStbbdev #define GREEN 1
61d86ed7fbStbbdev #define BLUE 2
62d86ed7fbStbbdev
63d86ed7fbStbbdev static int xranges = 1;
64d86ed7fbStbbdev static int yranges = 1;
65d86ed7fbStbbdev static int xsize = -1;
66d86ed7fbStbbdev static int ysize = -1;
67d86ed7fbStbbdev
68d86ed7fbStbbdev static int global_i = 0;
69d86ed7fbStbbdev static int speculation_start;
70d86ed7fbStbbdev
71d86ed7fbStbbdev #if EXTRA_DEBUG
72d86ed7fbStbbdev std::vector<int> cancel_count;
73d86ed7fbStbbdev std::vector<int> extra_count;
74d86ed7fbStbbdev std::vector<int> missing_count;
75d86ed7fbStbbdev std::vector<int> canceled_before;
76d86ed7fbStbbdev #endif
77d86ed7fbStbbdev std::vector<int> function_node_execs;
78d86ed7fbStbbdev static int xRangeMax = 3;
79d86ed7fbStbbdev static int yRangeMax = 3;
80d86ed7fbStbbdev static bool dont_speculate = false;
81d86ed7fbStbbdev static search_result_type last_update;
82d86ed7fbStbbdev
83d86ed7fbStbbdev class BMU_search_body {
84d86ed7fbStbbdev SOMap &my_map;
85d86ed7fbStbbdev subsquare_type my_square;
86d86ed7fbStbbdev int &fn_tally;
87d86ed7fbStbbdev
88d86ed7fbStbbdev public:
BMU_search_body(SOMap & _m,subsquare_type & _sq,int & fnt)89d86ed7fbStbbdev BMU_search_body(SOMap &_m, subsquare_type &_sq, int &fnt)
90d86ed7fbStbbdev : my_map(_m),
91d86ed7fbStbbdev my_square(_sq),
92d86ed7fbStbbdev fn_tally(fnt) {}
BMU_search_body(const BMU_search_body & other)93d86ed7fbStbbdev BMU_search_body(const BMU_search_body &other)
94d86ed7fbStbbdev : my_map(other.my_map),
95d86ed7fbStbbdev my_square(other.my_square),
96d86ed7fbStbbdev fn_tally(other.fn_tally) {}
operator ()(const SOM_element s)97d86ed7fbStbbdev search_result_type operator()(const SOM_element s) {
98d86ed7fbStbbdev int my_x;
99d86ed7fbStbbdev int my_y;
100d86ed7fbStbbdev double min_dist = my_map.BMU_range(s, my_x, my_y, my_square);
101d86ed7fbStbbdev ++fn_tally; // count how many times this function_node executed
102d86ed7fbStbbdev return search_result_type(min_dist, my_x, my_y);
103d86ed7fbStbbdev }
104d86ed7fbStbbdev };
105d86ed7fbStbbdev
106d86ed7fbStbbdev typedef oneapi::tbb::flow::function_node<SOM_element, search_result_type> search_node;
107d86ed7fbStbbdev typedef oneapi::tbb::flow::broadcast_node<SOM_element> b_node;
108d86ed7fbStbbdev typedef std::vector<search_node *> search_node_vector_type;
109d86ed7fbStbbdev typedef std::vector<search_node_vector_type> search_node_array_type;
110d86ed7fbStbbdev typedef std::vector<oneapi::tbb::flow::graph *> graph_vector_type;
111d86ed7fbStbbdev typedef std::vector<graph_vector_type> graph_array_type;
112d86ed7fbStbbdev
113d86ed7fbStbbdev #define SPECULATION_CNT 2
114d86ed7fbStbbdev
115d86ed7fbStbbdev oneapi::tbb::flow::graph *g[SPECULATION_CNT]; // main graph; there should only be one per epoch
116d86ed7fbStbbdev b_node *send_to[SPECULATION_CNT]; // broadcast node to send exemplar to all function_nodes
117d86ed7fbStbbdev oneapi::tbb::flow::queue_node<search_result_type>
118d86ed7fbStbbdev *q[SPECULATION_CNT]; // queue for function nodes to put their results in
119d86ed7fbStbbdev // each function_node should have its own graph
120d86ed7fbStbbdev search_node_array_type *s_array[SPECULATION_CNT]; // 2d array of function nodes
121d86ed7fbStbbdev graph_array_type *g_array[SPECULATION_CNT]; // 2d array of graphs
122d86ed7fbStbbdev
123d86ed7fbStbbdev // All graphs must locate in the same arena.
construct_graph(oneapi::tbb::task_arena & ta)124d86ed7fbStbbdev oneapi::tbb::flow::graph *construct_graph(oneapi::tbb::task_arena &ta) {
125d86ed7fbStbbdev oneapi::tbb::flow::graph *result;
126d86ed7fbStbbdev ta.execute([&result] {
127d86ed7fbStbbdev result = new oneapi::tbb::flow::graph();
128d86ed7fbStbbdev });
129d86ed7fbStbbdev return result;
130d86ed7fbStbbdev }
131d86ed7fbStbbdev
132d86ed7fbStbbdev // build a set of SPECULATION_CNT graphs, each of which consists of a broadcast_node,
133d86ed7fbStbbdev // xranges x yranges function_nodes, and one queue_node for output.
134d86ed7fbStbbdev // once speculation starts, if i % SPECULATION_CNT is the current graph, (i+1) % SPECULATION_CNT
135d86ed7fbStbbdev // is the first speculation, and so on.
build_BMU_graph(SOMap & map1,oneapi::tbb::task_arena & ta)136d86ed7fbStbbdev void build_BMU_graph(SOMap &map1, oneapi::tbb::task_arena &ta) {
137d86ed7fbStbbdev // build current graph
138d86ed7fbStbbdev xsize = ((int)map1.size() + xranges - 1) / xranges;
139d86ed7fbStbbdev ysize = ((int)map1[0].size() + yranges - 1) / yranges;
140d86ed7fbStbbdev function_node_execs.clear();
141d86ed7fbStbbdev function_node_execs.reserve(xranges * yranges + 1);
142d86ed7fbStbbdev for (int i = 0; i < xranges * yranges + 1; ++i)
143d86ed7fbStbbdev function_node_execs.push_back(0);
144d86ed7fbStbbdev
145d86ed7fbStbbdev for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
146d86ed7fbStbbdev g[scnt] = construct_graph(ta);
147d86ed7fbStbbdev send_to[scnt] = new b_node(*(g[scnt])); // broadcast node to the function_nodes
148d86ed7fbStbbdev q[scnt] = new oneapi::tbb::flow::queue_node<search_result_type>(*(g[scnt])); // output queue
149d86ed7fbStbbdev
150d86ed7fbStbbdev // create the function_nodes, tie to the graph
151d86ed7fbStbbdev s_array[scnt] = new search_node_array_type;
152d86ed7fbStbbdev s_array[scnt]->reserve(xranges);
153d86ed7fbStbbdev g_array[scnt] = new graph_array_type;
154d86ed7fbStbbdev g_array[scnt]->reserve(xranges);
155d86ed7fbStbbdev for (int i = 0; i < (int)map1.size(); i += xsize) {
156d86ed7fbStbbdev int xindex = i / xsize;
157d86ed7fbStbbdev s_array[scnt]->push_back(search_node_vector_type());
158d86ed7fbStbbdev #if EXTRA_DEBUG
159d86ed7fbStbbdev if (s_array[scnt]->size() != xindex + 1) {
160d86ed7fbStbbdev printf("Error; s_array[%d]->size() == %d, xindex== %d\n",
161d86ed7fbStbbdev scnt,
162d86ed7fbStbbdev (int)(s_array[scnt]->size()),
163d86ed7fbStbbdev xindex);
164d86ed7fbStbbdev }
165d86ed7fbStbbdev #endif
166d86ed7fbStbbdev (*s_array[scnt])[xindex].reserve(yranges);
167d86ed7fbStbbdev g_array[scnt]->push_back(graph_vector_type());
168d86ed7fbStbbdev (*g_array[scnt])[xindex].reserve(yranges);
169d86ed7fbStbbdev for (int j = 0; j < (int)map1[0].size(); j += ysize) {
170d86ed7fbStbbdev int offset = (i / xsize) * yranges + (j / ysize);
171d86ed7fbStbbdev int xmax = (i + xsize) > (int)map1.size() ? (int)map1.size() : i + xsize;
172d86ed7fbStbbdev int ymax = (j + ysize) > (int)map1[0].size() ? (int)map1[0].size() : j + ysize;
173d86ed7fbStbbdev subsquare_type sst(i, xmax, 1, j, ymax, 1);
174d86ed7fbStbbdev BMU_search_body bb(map1, sst, function_node_execs[offset]);
175d86ed7fbStbbdev oneapi::tbb::flow::graph *g_local = construct_graph(ta);
176d86ed7fbStbbdev search_node *s =
177d86ed7fbStbbdev new search_node(*g_local, oneapi::tbb::flow::serial, bb); // copies Body
178d86ed7fbStbbdev (*g_array[scnt])[xindex].push_back(g_local);
179d86ed7fbStbbdev (*s_array[scnt])[xindex].push_back(s);
180d86ed7fbStbbdev oneapi::tbb::flow::make_edge(*(send_to[scnt]),
181d86ed7fbStbbdev *s); // broadcast_node -> function_node
182d86ed7fbStbbdev oneapi::tbb::flow::make_edge(*s, *(q[scnt])); // function_node -> queue_node
183d86ed7fbStbbdev }
184d86ed7fbStbbdev }
185d86ed7fbStbbdev }
186d86ed7fbStbbdev }
187d86ed7fbStbbdev
188d86ed7fbStbbdev // Wait for the 2D array of flow::graphs.
wait_for_all_graphs(int cIndex)189d86ed7fbStbbdev void wait_for_all_graphs(int cIndex) { // cIndex ranges over [0 .. SPECULATION_CNT - 1]
190d86ed7fbStbbdev for (int x = 0; x < xranges; ++x) {
191d86ed7fbStbbdev for (int y = 0; y < yranges; ++y) {
192d86ed7fbStbbdev (*g_array[cIndex])[x][y]->wait_for_all();
193d86ed7fbStbbdev }
194d86ed7fbStbbdev }
195d86ed7fbStbbdev }
196d86ed7fbStbbdev
destroy_BMU_graph()197d86ed7fbStbbdev void destroy_BMU_graph() {
198d86ed7fbStbbdev for (int scnt = 0; scnt < SPECULATION_CNT; ++scnt) {
199d86ed7fbStbbdev for (int i = 0; i < (int)(*s_array[scnt]).size(); ++i) {
200d86ed7fbStbbdev for (int j = 0; j < (int)(*s_array[scnt])[i].size(); ++j) {
201d86ed7fbStbbdev delete (*s_array[scnt])[i][j];
202d86ed7fbStbbdev delete (*g_array[scnt])[i][j];
203d86ed7fbStbbdev }
204d86ed7fbStbbdev }
205d86ed7fbStbbdev (*s_array[scnt]).clear();
206d86ed7fbStbbdev delete s_array[scnt];
207d86ed7fbStbbdev (*g_array[scnt]).clear();
208d86ed7fbStbbdev delete g_array[scnt];
209d86ed7fbStbbdev delete q[scnt];
210d86ed7fbStbbdev delete send_to[scnt];
211d86ed7fbStbbdev delete g[scnt];
212d86ed7fbStbbdev }
213d86ed7fbStbbdev }
214d86ed7fbStbbdev
find_subrange_overlap(int const & xval,int const & yval,double const & radius,int & xlow,int & xhigh,int & ylow,int & yhigh)215d86ed7fbStbbdev void find_subrange_overlap(int const &xval,
216d86ed7fbStbbdev int const &yval,
217d86ed7fbStbbdev double const &radius,
218d86ed7fbStbbdev int &xlow,
219d86ed7fbStbbdev int &xhigh,
220d86ed7fbStbbdev int &ylow,
221d86ed7fbStbbdev int &yhigh) {
222d86ed7fbStbbdev xlow = int((xval - radius) / xsize);
223d86ed7fbStbbdev xhigh = int((xval + radius) / xsize);
224d86ed7fbStbbdev ylow = int((yval - radius) / ysize);
225d86ed7fbStbbdev yhigh = int((yval + radius) / ysize);
226d86ed7fbStbbdev // circle may fall partly outside map
227d86ed7fbStbbdev if (xlow < 0)
228d86ed7fbStbbdev xlow = 0;
229d86ed7fbStbbdev if (xhigh >= xranges)
230d86ed7fbStbbdev xhigh = xranges - 1;
231d86ed7fbStbbdev if (ylow < 0)
232d86ed7fbStbbdev ylow = 0;
233d86ed7fbStbbdev if (yhigh >= yranges)
234d86ed7fbStbbdev yhigh = yranges - 1;
235d86ed7fbStbbdev #if EXTRA_DEBUG
236d86ed7fbStbbdev if (xlow >= xranges)
237d86ed7fbStbbdev printf(" Error *** xlow == %d\n", xlow);
238d86ed7fbStbbdev if (xhigh < 0)
239d86ed7fbStbbdev printf("Error *** xhigh == %d\n", xhigh);
240d86ed7fbStbbdev if (ylow >= yranges)
241d86ed7fbStbbdev printf("Error *** ylow == %d\n", ylow);
242d86ed7fbStbbdev if (yhigh < 0)
243d86ed7fbStbbdev printf("Error *** yhigh == %d\n", yhigh);
244d86ed7fbStbbdev #endif
245d86ed7fbStbbdev }
246d86ed7fbStbbdev
overlap(int & xval,int & yval,search_result_type & sr)247d86ed7fbStbbdev bool overlap(int &xval, int &yval, search_result_type &sr) {
248d86ed7fbStbbdev int xlow, xhigh, ylow, yhigh;
249d86ed7fbStbbdev find_subrange_overlap(
250d86ed7fbStbbdev std::get<XV>(sr), std::get<YV>(sr), std::get<RADIUS>(sr), xlow, xhigh, ylow, yhigh);
251d86ed7fbStbbdev return xval >= xlow && xval <= xhigh && yval >= ylow && yval <= yhigh;
252d86ed7fbStbbdev }
253d86ed7fbStbbdev
cancel_submaps(int & xval,int & yval,double & radius,int indx)254d86ed7fbStbbdev void cancel_submaps(int &xval, int &yval, double &radius, int indx) {
255d86ed7fbStbbdev int xlow;
256d86ed7fbStbbdev int xhigh;
257d86ed7fbStbbdev int ylow;
258d86ed7fbStbbdev int yhigh;
259d86ed7fbStbbdev find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
260d86ed7fbStbbdev for (int x = xlow; x <= xhigh; ++x) {
261d86ed7fbStbbdev for (int y = ylow; y <= yhigh; ++y) {
262d86ed7fbStbbdev (*g_array[indx])[x][y]->cancel();
263d86ed7fbStbbdev }
264d86ed7fbStbbdev }
265d86ed7fbStbbdev #if EXTRA_DEBUG
266d86ed7fbStbbdev ++cancel_count[(xhigh - xlow + 1) * (yhigh - ylow + 1)];
267d86ed7fbStbbdev #endif
268d86ed7fbStbbdev }
269d86ed7fbStbbdev
restart_submaps(int & xval,int & yval,double & radius,int indx,SOM_element & vector)270d86ed7fbStbbdev void restart_submaps(int &xval, int &yval, double &radius, int indx, SOM_element &vector) {
271d86ed7fbStbbdev int xlow;
272d86ed7fbStbbdev int xhigh;
273d86ed7fbStbbdev int ylow;
274d86ed7fbStbbdev int yhigh;
275d86ed7fbStbbdev find_subrange_overlap(xval, yval, radius, xlow, xhigh, ylow, yhigh);
276d86ed7fbStbbdev for (int x = xlow; x <= xhigh; ++x) {
277d86ed7fbStbbdev for (int y = ylow; y <= yhigh; ++y) {
278d86ed7fbStbbdev // have to reset the graph
279d86ed7fbStbbdev (*g_array[indx])[x][y]->reset();
280d86ed7fbStbbdev // and re-submit the exemplar for search.
281d86ed7fbStbbdev (*s_array[indx])[x][y]->try_put(vector);
282d86ed7fbStbbdev }
283d86ed7fbStbbdev }
284d86ed7fbStbbdev }
285d86ed7fbStbbdev
graph_BMU(int indx)286d86ed7fbStbbdev search_result_type graph_BMU(int indx) { // indx ranges over [0 .. SPECULATION_CNT -1]
287d86ed7fbStbbdev wait_for_all_graphs(indx); // wait for the array of subgraphs
288d86ed7fbStbbdev (g[indx])->wait_for_all();
289d86ed7fbStbbdev std::vector<search_result_type> all_srs(xRangeMax * yRangeMax,
290d86ed7fbStbbdev search_result_type(DBL_MAX, -1, -1));
291d86ed7fbStbbdev #if EXTRA_DEBUG
292d86ed7fbStbbdev int extra_computations = 0;
293d86ed7fbStbbdev #endif
294d86ed7fbStbbdev search_result_type sr;
295d86ed7fbStbbdev search_result_type min_sr;
296d86ed7fbStbbdev std::get<RADIUS>(min_sr) = DBL_MAX;
297d86ed7fbStbbdev int result_count = 0;
298d86ed7fbStbbdev while ((q[indx])->try_get(sr)) {
299d86ed7fbStbbdev ++result_count;
300d86ed7fbStbbdev // figure which submap this came from
301d86ed7fbStbbdev int x = std::get<XV>(sr) / xsize;
302d86ed7fbStbbdev int y = std::get<YV>(sr) / ysize;
303d86ed7fbStbbdev #if EXTRA_DEBUG
304d86ed7fbStbbdev if (x < 0 || x >= xranges)
305d86ed7fbStbbdev printf(" ### x value out of range (%d)\n", x);
306d86ed7fbStbbdev if (y < 0 || y >= yranges)
307d86ed7fbStbbdev printf(" ### y value out of range (%d)\n", y);
308d86ed7fbStbbdev #endif
309d86ed7fbStbbdev int offset = x * yranges + y; // linearized subscript
310d86ed7fbStbbdev #if EXTRA_DEBUG
311d86ed7fbStbbdev if (std::get<RADIUS>(all_srs[offset]) !=
312d86ed7fbStbbdev DBL_MAX) { // we've already got a result from this subsquare
313d86ed7fbStbbdev ++extra_computations;
314d86ed7fbStbbdev }
315d86ed7fbStbbdev else if (std::get<XV>(all_srs[offset]) != -1) {
316d86ed7fbStbbdev if (extra_debug)
317d86ed7fbStbbdev printf("More than one cancellation of [%d,%d] iteration %d\n", x, y, global_i);
318d86ed7fbStbbdev }
319d86ed7fbStbbdev #endif
320d86ed7fbStbbdev all_srs[offset] = sr;
321d86ed7fbStbbdev if (std::get<RADIUS>(sr) < std::get<RADIUS>(min_sr))
322d86ed7fbStbbdev min_sr = sr;
323d86ed7fbStbbdev else if (std::get<RADIUS>(sr) == std::get<RADIUS>(min_sr)) {
324d86ed7fbStbbdev if (std::get<XV>(sr) < std::get<XV>(min_sr)) {
325d86ed7fbStbbdev min_sr = sr;
326d86ed7fbStbbdev }
327d86ed7fbStbbdev else if ((std::get<XV>(sr) == std::get<XV>(min_sr) &&
328d86ed7fbStbbdev std::get<YV>(sr) < std::get<YV>(min_sr))) {
329d86ed7fbStbbdev min_sr = sr;
330d86ed7fbStbbdev }
331d86ed7fbStbbdev }
332d86ed7fbStbbdev }
333d86ed7fbStbbdev #if EXTRA_DEBUG
334d86ed7fbStbbdev if (result_count != xranges * yranges + extra_computations) {
335d86ed7fbStbbdev // we are missing at least one of the expected results. Tally the missing values
336d86ed7fbStbbdev for (int i = 0; i < xranges * yranges; ++i) {
337d86ed7fbStbbdev if (std::get<RADIUS>(all_srs[i]) == DBL_MAX) {
338d86ed7fbStbbdev // i == x*yranges + y
339d86ed7fbStbbdev int xval = i / yranges;
340d86ed7fbStbbdev int yval = i % yranges;
341d86ed7fbStbbdev bool received_cancel_result = std::get<XV>(all_srs[i]) != -1;
342d86ed7fbStbbdev if (overlap(xval, yval, last_update)) {
343d86ed7fbStbbdev // we have previously canceled this subsquare.
344d86ed7fbStbbdev printf("No result for [%d,%d] which was canceled(%s)\n",
345d86ed7fbStbbdev xval,
346d86ed7fbStbbdev yval,
347d86ed7fbStbbdev received_cancel_result ? "T" : "F");
348d86ed7fbStbbdev ++canceled_before[i];
349d86ed7fbStbbdev }
350d86ed7fbStbbdev else {
351d86ed7fbStbbdev printf("No result for [%d,%d] which was not canceled(%s)\n",
352d86ed7fbStbbdev xval,
353d86ed7fbStbbdev yval,
354d86ed7fbStbbdev received_cancel_result ? "T" : "F");
355d86ed7fbStbbdev }
356d86ed7fbStbbdev ++missing_count[i];
357d86ed7fbStbbdev }
358d86ed7fbStbbdev }
359d86ed7fbStbbdev }
360d86ed7fbStbbdev if (extra_computations)
361d86ed7fbStbbdev ++extra_count[extra_computations];
362d86ed7fbStbbdev #endif
363d86ed7fbStbbdev return min_sr;
364d86ed7fbStbbdev // end of one epoch
365d86ed7fbStbbdev }
366d86ed7fbStbbdev
graph_teach(SOMap & map1,teaching_vector_type & in,oneapi::tbb::task_arena & ta)367d86ed7fbStbbdev void graph_teach(SOMap &map1, teaching_vector_type &in, oneapi::tbb::task_arena &ta) {
368d86ed7fbStbbdev build_BMU_graph(map1, ta);
369d86ed7fbStbbdev #if EXTRA_DEBUG
370d86ed7fbStbbdev cancel_count.clear();
371d86ed7fbStbbdev extra_count.clear();
372d86ed7fbStbbdev missing_count.clear();
373d86ed7fbStbbdev canceled_before.clear();
374d86ed7fbStbbdev cancel_count.reserve(xRangeMax * yRangeMax + 1);
375d86ed7fbStbbdev extra_count.reserve(xRangeMax * yRangeMax + 1);
376d86ed7fbStbbdev missing_count.reserve(xRangeMax * yRangeMax + 1);
377d86ed7fbStbbdev canceled_before.reserve(xRangeMax * yRangeMax + 1);
378*e1b24895SIvan Kochin for (int i = 0; i < xRangeMax * yRangeMax + 1; ++i) {
379d86ed7fbStbbdev cancel_count.push_back(0);
380d86ed7fbStbbdev extra_count.push_back(0);
381d86ed7fbStbbdev missing_count.push_back(0);
382d86ed7fbStbbdev canceled_before.push_back(0);
383d86ed7fbStbbdev }
384d86ed7fbStbbdev #endif
385d86ed7fbStbbdev // normally the training would pick random exemplars to teach the SOM. We need
386d86ed7fbStbbdev // the process to be reproducible, so we will pick the exemplars in order, [0, in.size())
387d86ed7fbStbbdev int next_j = 0;
388d86ed7fbStbbdev for (int epoch = 0; epoch < nPasses; ++epoch) {
389d86ed7fbStbbdev global_i = epoch;
390d86ed7fbStbbdev bool canceled_submaps = false;
391d86ed7fbStbbdev int j = next_j; // try to make reproducible
392d86ed7fbStbbdev next_j = (epoch + 1) % in.size();
393d86ed7fbStbbdev search_result_type min_sr;
394d86ed7fbStbbdev if (epoch < speculation_start) {
395d86ed7fbStbbdev (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
396d86ed7fbStbbdev }
397d86ed7fbStbbdev else if (epoch == speculation_start) {
398d86ed7fbStbbdev (send_to[epoch % SPECULATION_CNT])->try_put(in[j]);
399d86ed7fbStbbdev if (epoch < nPasses - 1) {
400d86ed7fbStbbdev (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
401d86ed7fbStbbdev }
402d86ed7fbStbbdev }
403d86ed7fbStbbdev else if (epoch < nPasses - 1) {
404d86ed7fbStbbdev (send_to[(epoch + 1) % SPECULATION_CNT])->try_put(in[next_j]);
405d86ed7fbStbbdev }
406d86ed7fbStbbdev min_sr = graph_BMU(epoch % SPECULATION_CNT); //calls wait_for_all()
407d86ed7fbStbbdev double min_distance = std::get<0>(min_sr);
408d86ed7fbStbbdev double radius = max_radius * exp(-(double)epoch * radius_decay_rate);
409d86ed7fbStbbdev double learning_rate = max_learning_rate * exp(-(double)epoch * learning_decay_rate);
410d86ed7fbStbbdev if (epoch >= speculation_start && epoch < (nPasses - 1)) {
411d86ed7fbStbbdev // have to cancel the affected submaps
412d86ed7fbStbbdev cancel_submaps(
413d86ed7fbStbbdev std::get<XV>(min_sr), std::get<YV>(min_sr), radius, (epoch + 1) % SPECULATION_CNT);
414d86ed7fbStbbdev canceled_submaps = true;
415d86ed7fbStbbdev }
416d86ed7fbStbbdev map1.epoch_update(
417d86ed7fbStbbdev in[j], epoch, std::get<1>(min_sr), std::get<2>(min_sr), radius, learning_rate);
418d86ed7fbStbbdev ++global_i;
419d86ed7fbStbbdev if (canceled_submaps) {
420d86ed7fbStbbdev // do I have to wait for all the non-canceled speculative graph to complete first?
421d86ed7fbStbbdev // yes, in case a canceled task was already executing.
422d86ed7fbStbbdev wait_for_all_graphs((epoch + 1) % SPECULATION_CNT); // wait for the array of subgraphs
423d86ed7fbStbbdev restart_submaps(std::get<1>(min_sr),
424d86ed7fbStbbdev std::get<2>(min_sr),
425d86ed7fbStbbdev radius,
426d86ed7fbStbbdev (epoch + 1) % SPECULATION_CNT,
427d86ed7fbStbbdev in[next_j]);
428d86ed7fbStbbdev }
429d86ed7fbStbbdev
430d86ed7fbStbbdev last_update = min_sr;
431d86ed7fbStbbdev std::get<RADIUS>(last_update) = radius; // not smallest value, but range of effect
432d86ed7fbStbbdev }
433d86ed7fbStbbdev destroy_BMU_graph();
434d86ed7fbStbbdev }
435d86ed7fbStbbdev
436d86ed7fbStbbdev static const double serial_time_adjust = 1.25;
437d86ed7fbStbbdev static double radius_fraction = 3.0;
438d86ed7fbStbbdev
main(int argc,char * argv[])439d86ed7fbStbbdev int main(int argc, char *argv[]) {
440d86ed7fbStbbdev int l_speculation_start;
441d86ed7fbStbbdev utility::thread_number_range threads(
442d86ed7fbStbbdev utility::get_default_num_threads,
443d86ed7fbStbbdev utility::
444d86ed7fbStbbdev get_default_num_threads() // run only the default number of threads if none specified
445d86ed7fbStbbdev );
446d86ed7fbStbbdev
447d86ed7fbStbbdev utility::parse_cli_arguments(
448d86ed7fbStbbdev argc,
449d86ed7fbStbbdev argv,
450d86ed7fbStbbdev utility::cli_argument_pack()
451d86ed7fbStbbdev //"-h" option for for displaying help is present implicitly
452d86ed7fbStbbdev .positional_arg(
453d86ed7fbStbbdev threads,
454d86ed7fbStbbdev "n-of-threads",
455d86ed7fbStbbdev "number of threads to use; a range of the form low[:high], where low and optional high are non-negative integers or 'auto' for the TBB default.")
456d86ed7fbStbbdev // .positional_arg(InputFileName,"input-file","input file name")
457d86ed7fbStbbdev // .positional_arg(OutputFileName,"output-file","output file name")
458d86ed7fbStbbdev .positional_arg(
459d86ed7fbStbbdev radius_fraction, "radius-fraction", "size of radius at which to start speculating")
460d86ed7fbStbbdev .positional_arg(
461d86ed7fbStbbdev nPasses, "number-of-epochs", "number of examples used in learning phase")
462d86ed7fbStbbdev .arg(cancel_test, "cancel-test", "test for cancel signal while finding BMU")
463d86ed7fbStbbdev .arg(extra_debug, "debug", "additional output")
464d86ed7fbStbbdev .arg(dont_speculate, "nospeculate", "don't speculate in SOM map teaching"));
465d86ed7fbStbbdev
466d86ed7fbStbbdev readInputData();
467d86ed7fbStbbdev max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
468d86ed7fbStbbdev // need this value for the 1x1 timing below
469d86ed7fbStbbdev radius_decay_rate = -(log(1.0 / (double)max_radius) / (double)nPasses);
470d86ed7fbStbbdev find_data_ranges(my_teaching, max_range, min_range);
471d86ed7fbStbbdev if (extra_debug) {
472d86ed7fbStbbdev printf("Data range: ");
473d86ed7fbStbbdev remark_SOM_element(min_range);
474d86ed7fbStbbdev printf(" to ");
475d86ed7fbStbbdev remark_SOM_element(max_range);
476d86ed7fbStbbdev printf("\n");
477d86ed7fbStbbdev }
478d86ed7fbStbbdev
479d86ed7fbStbbdev // find how much time is taken for the single function_node case.
480d86ed7fbStbbdev // adjust nPasses so the 1x1 time is somewhere around serial_time_adjust seconds.
481d86ed7fbStbbdev // make sure the example test runs for at least 0.5 second.
482d86ed7fbStbbdev for (;;) {
483d86ed7fbStbbdev // Restrict max concurrency level via task_arena interface
484d86ed7fbStbbdev oneapi::tbb::task_arena ta(1);
485d86ed7fbStbbdev SOMap map1(xMax, yMax);
486d86ed7fbStbbdev speculation_start = nPasses + 1; // Don't speculate
487d86ed7fbStbbdev
488d86ed7fbStbbdev xranges = 1;
489d86ed7fbStbbdev yranges = 1;
490d86ed7fbStbbdev map1.initialize(InitializeGradient, max_range, min_range);
491d86ed7fbStbbdev oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
492d86ed7fbStbbdev graph_teach(map1, my_teaching, ta);
493d86ed7fbStbbdev oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
494d86ed7fbStbbdev double nSeconds = (t1 - t0).seconds();
495d86ed7fbStbbdev if (nSeconds < 0.5) {
496d86ed7fbStbbdev xMax *= 2;
497d86ed7fbStbbdev yMax *= 2;
498d86ed7fbStbbdev continue;
499d86ed7fbStbbdev }
500d86ed7fbStbbdev double size_adjust = sqrt(serial_time_adjust / nSeconds);
501d86ed7fbStbbdev xMax = (int)((double)xMax * size_adjust);
502d86ed7fbStbbdev yMax = (int)((double)yMax * size_adjust);
503d86ed7fbStbbdev max_radius = (xMax < yMax) ? yMax / 2 : xMax / 2;
504d86ed7fbStbbdev radius_decay_rate = log((double)max_radius) / (double)nPasses;
505d86ed7fbStbbdev
506d86ed7fbStbbdev if (extra_debug) {
507d86ed7fbStbbdev printf("original 1x1 case ran in %g seconds\n", nSeconds);
508d86ed7fbStbbdev printf(" Size of table == %d x %d\n", xMax, yMax);
509d86ed7fbStbbdev printf(" radius_decay_rate == %g\n", radius_decay_rate);
510d86ed7fbStbbdev }
511d86ed7fbStbbdev break;
512d86ed7fbStbbdev }
513d86ed7fbStbbdev
514d86ed7fbStbbdev // the "max_radius" starts at 1/2*radius_fraction the table size. To start the speculation when the radius is
515d86ed7fbStbbdev // 1 / n * the table size, the constant in the log below should be n / 2. so 2 == 1/4, 3 == 1/6th,
516d86ed7fbStbbdev // et c.
517d86ed7fbStbbdev if (dont_speculate) {
518d86ed7fbStbbdev l_speculation_start = nPasses + 1;
519d86ed7fbStbbdev if (extra_debug)
520d86ed7fbStbbdev printf("speculation will not be done\n");
521d86ed7fbStbbdev }
522d86ed7fbStbbdev else {
523d86ed7fbStbbdev if (radius_fraction < 1.0) {
524d86ed7fbStbbdev if (extra_debug)
525d86ed7fbStbbdev printf("Warning: radius_fraction should be >= 1. Setting to 1.\n");
526d86ed7fbStbbdev radius_fraction = 1.0;
527d86ed7fbStbbdev }
528d86ed7fbStbbdev l_speculation_start = (int)((double)nPasses * log(radius_fraction) / log((double)nPasses));
529d86ed7fbStbbdev if (extra_debug)
530d86ed7fbStbbdev printf("We will start speculation at iteration %d\n", l_speculation_start);
531d86ed7fbStbbdev }
532d86ed7fbStbbdev double single_time; // for speedup calculations
533d86ed7fbStbbdev #if EXTRA_DEBUG
534d86ed7fbStbbdev // storage for the single-subrange answers, for comparing maps
535d86ed7fbStbbdev std::vector<double> single_dist;
536d86ed7fbStbbdev single_dist.reserve(my_teaching.size());
537d86ed7fbStbbdev std::vector<int> single_xval;
538d86ed7fbStbbdev single_xval.reserve(my_teaching.size());
539d86ed7fbStbbdev std::vector<int> single_yval;
540d86ed7fbStbbdev single_yval.reserve(my_teaching.size());
541d86ed7fbStbbdev #endif
542*e1b24895SIvan Kochin //TODO: Investigate how to not require mandatory concurrency
543*e1b24895SIvan Kochin for (int p = std::max(threads.first, 2); p <= std::max(threads.last, 2); ++p) {
544d86ed7fbStbbdev // Restrict max concurrency level via task_arena interface
545*e1b24895SIvan Kochin oneapi::tbb::global_control limit(oneapi::tbb::global_control::max_allowed_parallelism, p);
546d86ed7fbStbbdev oneapi::tbb::task_arena ta(p);
547d86ed7fbStbbdev if (extra_debug)
548d86ed7fbStbbdev printf(" -------------- Running with %d threads. ------------\n", p);
549d86ed7fbStbbdev // run the SOM build for a series of subranges
550d86ed7fbStbbdev for (xranges = 1; xranges <= xRangeMax; ++xranges) {
551d86ed7fbStbbdev for (yranges = xranges; yranges <= yRangeMax; ++yranges) {
552d86ed7fbStbbdev if (xranges == 1 && yranges == 1) {
553d86ed7fbStbbdev // don't pointlessly speculate if we're only running one subrange.
554d86ed7fbStbbdev speculation_start = nPasses + 1;
555d86ed7fbStbbdev }
556d86ed7fbStbbdev else {
557d86ed7fbStbbdev speculation_start = l_speculation_start;
558d86ed7fbStbbdev }
559d86ed7fbStbbdev SOMap map1(xMax, yMax);
560d86ed7fbStbbdev map1.initialize(InitializeGradient, max_range, min_range);
561d86ed7fbStbbdev
562d86ed7fbStbbdev if (extra_debug)
563d86ed7fbStbbdev printf("Start learning for [%d,%d] ----------- \n", xranges, yranges);
564d86ed7fbStbbdev oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
565d86ed7fbStbbdev graph_teach(map1, my_teaching, ta);
566d86ed7fbStbbdev oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
567d86ed7fbStbbdev
568d86ed7fbStbbdev if (extra_debug)
569d86ed7fbStbbdev printf("Done learning for [%d,%d], which took %g seconds ",
570d86ed7fbStbbdev xranges,
571d86ed7fbStbbdev yranges,
572d86ed7fbStbbdev (t1 - t0).seconds());
573d86ed7fbStbbdev if (xranges == 1 && yranges == 1)
574d86ed7fbStbbdev single_time = (t1 - t0).seconds();
575d86ed7fbStbbdev if (extra_debug)
576d86ed7fbStbbdev printf(": speedup == %g\n", single_time / (t1 - t0).seconds());
577d86ed7fbStbbdev
578d86ed7fbStbbdev #if EXTRA_DEBUG
579d86ed7fbStbbdev if (extra_debug) {
580d86ed7fbStbbdev // number of times cancel was called, indexed by number of subranges canceled
581d86ed7fbStbbdev for (int i = 0; i < cancel_count.size(); ++i) {
582d86ed7fbStbbdev // only write output if we have a non-zero value.
583d86ed7fbStbbdev if (cancel_count[i] > 0) {
584d86ed7fbStbbdev int totalcnt = 0;
585d86ed7fbStbbdev printf(" cancellations: ");
586d86ed7fbStbbdev for (int j = 0; j < cancel_count.size(); ++j) {
587d86ed7fbStbbdev if (cancel_count[j]) {
588d86ed7fbStbbdev printf(" %d [%d]", j, cancel_count[j]);
589d86ed7fbStbbdev totalcnt += cancel_count[j];
590d86ed7fbStbbdev }
591d86ed7fbStbbdev }
592d86ed7fbStbbdev totalcnt += speculation_start;
593d86ed7fbStbbdev printf(" for a total of %d\n", totalcnt);
594d86ed7fbStbbdev break; // from for
595d86ed7fbStbbdev }
596d86ed7fbStbbdev }
597d86ed7fbStbbdev
598d86ed7fbStbbdev // number of extra results (these occur when the subrange task starts before
599d86ed7fbStbbdev // cancel is received.)
600d86ed7fbStbbdev for (int i = 0; i < extra_count.size(); ++i) {
601d86ed7fbStbbdev if (extra_count[i] > 0) {
602d86ed7fbStbbdev int totalcnt = 0;
603d86ed7fbStbbdev printf("extra computations: ");
604d86ed7fbStbbdev for (int j = 0; j < extra_count.size(); ++j) {
605d86ed7fbStbbdev if (extra_count[j]) {
606d86ed7fbStbbdev printf(" %d[%d]", j, extra_count[j]);
607d86ed7fbStbbdev totalcnt += extra_count[j];
608d86ed7fbStbbdev }
609d86ed7fbStbbdev }
610d86ed7fbStbbdev totalcnt += speculation_start;
611d86ed7fbStbbdev printf(" for a total of %d\n", totalcnt);
612d86ed7fbStbbdev break; // from for
613d86ed7fbStbbdev }
614d86ed7fbStbbdev }
615d86ed7fbStbbdev
616d86ed7fbStbbdev // here we count the number of times we looked for a particular subrange when fetching
617d86ed7fbStbbdev // the queue_node output and didn't find anything. This may occur when a function_node
618d86ed7fbStbbdev // is "stuck" and doesn't process some number of exemplars. function_node_execs is
619d86ed7fbStbbdev // a count of the number of times the corresponding function_node was executed (in
620d86ed7fbStbbdev // case the problem is dropped output in the queue_node.)
621d86ed7fbStbbdev for (int i = 0; i < missing_count.size(); ++i) {
622d86ed7fbStbbdev if (missing_count[i]) {
623d86ed7fbStbbdev int xval = i / yranges;
624d86ed7fbStbbdev int yval = i % yranges;
625d86ed7fbStbbdev printf(" f_node[%d,%d] missed %d values", xval, yval, missing_count[i]);
626d86ed7fbStbbdev if (canceled_before[i]) {
627d86ed7fbStbbdev printf(" canceled_before == %d", canceled_before[i]);
628d86ed7fbStbbdev }
629d86ed7fbStbbdev printf(", fn_tally == %d\n", function_node_execs[i]);
630d86ed7fbStbbdev }
631d86ed7fbStbbdev }
632d86ed7fbStbbdev }
633d86ed7fbStbbdev
634d86ed7fbStbbdev // check that output matches the 1x1 case
635d86ed7fbStbbdev for (int i = 0; i < my_teaching.size(); ++i) {
636d86ed7fbStbbdev int xdist;
637d86ed7fbStbbdev int ydist;
638d86ed7fbStbbdev double my_dist = map1.BMU(my_teaching[i], xdist, ydist);
639d86ed7fbStbbdev if (xranges == 1 && yranges == 1) {
640d86ed7fbStbbdev single_dist.push_back(my_dist);
641d86ed7fbStbbdev single_xval.push_back(xdist);
642d86ed7fbStbbdev single_yval.push_back(ydist);
643d86ed7fbStbbdev }
644d86ed7fbStbbdev else {
645d86ed7fbStbbdev if (single_dist[i] != my_dist || single_xval[i] != xdist ||
646d86ed7fbStbbdev single_yval[i] != ydist)
647d86ed7fbStbbdev printf(
648d86ed7fbStbbdev "Error in output: expecting <%g, %d, %d>, but got <%g, %d, %d>\n",
649d86ed7fbStbbdev single_dist[i],
650d86ed7fbStbbdev single_xval[i],
651d86ed7fbStbbdev single_yval[i],
652d86ed7fbStbbdev my_dist,
653d86ed7fbStbbdev xdist,
654d86ed7fbStbbdev ydist);
655d86ed7fbStbbdev }
656d86ed7fbStbbdev }
657d86ed7fbStbbdev #endif
658d86ed7fbStbbdev } // yranges
659d86ed7fbStbbdev } // xranges
660d86ed7fbStbbdev } // #threads p
661d86ed7fbStbbdev printf("done\n");
662d86ed7fbStbbdev return 0;
663d86ed7fbStbbdev }
664