1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev     The original source for this example is
19d86ed7fbStbbdev     Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev     All rights reserved.
21d86ed7fbStbbdev 
22d86ed7fbStbbdev     Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev     modification, are permitted provided that the following conditions
24d86ed7fbStbbdev     are met:
25d86ed7fbStbbdev     1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev     2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev        documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev     3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev        derived from this software without specific prior written permission.
32d86ed7fbStbbdev 
33d86ed7fbStbbdev     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev     SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev 
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev 
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev 
71d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
72d86ed7fbStbbdev 
73d86ed7fbStbbdev // rgb colors list for coloring image by each thread
74d86ed7fbStbbdev static const float inner_alpha = 0.3;
75d86ed7fbStbbdev static const float border_alpha = 0.5;
76d86ed7fbStbbdev #define NUM_COLORS 24
77d86ed7fbStbbdev static int colors[NUM_COLORS][3] = {
78d86ed7fbStbbdev     { 255, 110, 0 },   { 220, 254, 0 },   { 102, 254, 0 },  { 0, 21, 254 },    { 97, 0, 254 },
79d86ed7fbStbbdev     { 254, 30, 0 },    { 20, 41, 8 },     { 144, 238, 38 }, { 184, 214, 139 }, { 28, 95, 20 },
80d86ed7fbStbbdev     { 139, 173, 148 }, { 188, 228, 183 }, { 145, 47, 56 },  { 204, 147, 193 }, { 45, 202, 143 },
81d86ed7fbStbbdev     { 204, 171, 143 }, { 143, 160, 204 }, { 220, 173, 3 },  { 1, 152, 231 },   { 79, 235, 237 },
82d86ed7fbStbbdev     { 52, 193, 72 },   { 67, 136, 151 },  { 78, 87, 179 },  { 143, 255, 9 },
83d86ed7fbStbbdev };
84d86ed7fbStbbdev 
85d86ed7fbStbbdev #include <atomic>
86d86ed7fbStbbdev #include "oneapi/tbb/enumerable_thread_specific.h"
87d86ed7fbStbbdev // storage and counter for thread numbers in order of first task run
88d86ed7fbStbbdev typedef oneapi::tbb::enumerable_thread_specific<int> thread_id_t;
89d86ed7fbStbbdev thread_id_t thread_ids(-1);
90d86ed7fbStbbdev std::atomic<int> thread_number;
91d86ed7fbStbbdev 
92d86ed7fbStbbdev #endif
93d86ed7fbStbbdev 
94d86ed7fbStbbdev #include "oneapi/tbb/parallel_for.h"
95d86ed7fbStbbdev #include "oneapi/tbb/spin_mutex.h"
96d86ed7fbStbbdev #include "oneapi/tbb/blocked_range2d.h"
97d86ed7fbStbbdev #include "oneapi/tbb/global_control.h"
98d86ed7fbStbbdev #include "common/utility/get_default_num_threads.hpp"
99d86ed7fbStbbdev 
100d86ed7fbStbbdev static oneapi::tbb::spin_mutex MyMutex, MyMutex2;
101d86ed7fbStbbdev 
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy,int * blend,float alpha)102d86ed7fbStbbdev static color_t render_one_pixel(int x,
103d86ed7fbStbbdev                                 int y,
104d86ed7fbStbbdev                                 unsigned int *local_mbox,
105d86ed7fbStbbdev                                 unsigned int &serial,
106d86ed7fbStbbdev                                 int startx,
107d86ed7fbStbbdev                                 int stopx,
108d86ed7fbStbbdev                                 int starty,
109d86ed7fbStbbdev                                 int stopy
110d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
111d86ed7fbStbbdev                                 ,
112d86ed7fbStbbdev                                 int *blend,
113d86ed7fbStbbdev                                 float alpha
114d86ed7fbStbbdev #endif
115d86ed7fbStbbdev ) {
116d86ed7fbStbbdev     /* private vars moved inside loop */
117d86ed7fbStbbdev     ray primary, sample;
118d86ed7fbStbbdev     color col, avcol;
119d86ed7fbStbbdev     int R, G, B;
120d86ed7fbStbbdev     intersectstruct local_intersections;
121d86ed7fbStbbdev     int alias;
122d86ed7fbStbbdev     /* end private */
123d86ed7fbStbbdev 
124d86ed7fbStbbdev     primary = camray(&scene, x, y);
125d86ed7fbStbbdev     primary.intstruct = &local_intersections;
126d86ed7fbStbbdev     primary.flags = RT_RAY_REGULAR;
127d86ed7fbStbbdev 
128d86ed7fbStbbdev     serial++;
129d86ed7fbStbbdev     primary.serial = serial;
130d86ed7fbStbbdev     primary.mbox = local_mbox;
131d86ed7fbStbbdev     primary.maxdist = FHUGE;
132d86ed7fbStbbdev     primary.scene = &scene;
133d86ed7fbStbbdev     col = trace(&primary);
134d86ed7fbStbbdev 
135d86ed7fbStbbdev     serial = primary.serial;
136d86ed7fbStbbdev 
137d86ed7fbStbbdev     /* perform antialiasing if enabled.. */
138d86ed7fbStbbdev     if (scene.antialiasing > 0) {
139d86ed7fbStbbdev         for (alias = 0; alias < scene.antialiasing; alias++) {
140d86ed7fbStbbdev             serial++; /* increment serial number */
141d86ed7fbStbbdev             sample = primary; /* copy the regular primary ray to start with */
142d86ed7fbStbbdev             sample.serial = serial;
143d86ed7fbStbbdev 
144d86ed7fbStbbdev             {
145d86ed7fbStbbdev                 oneapi::tbb::spin_mutex::scoped_lock lock(MyMutex);
146d86ed7fbStbbdev                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
147d86ed7fbStbbdev                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
148d86ed7fbStbbdev                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
149d86ed7fbStbbdev             }
150d86ed7fbStbbdev 
151d86ed7fbStbbdev             avcol = trace(&sample);
152d86ed7fbStbbdev 
153d86ed7fbStbbdev             serial = sample.serial; /* update our overall serial # */
154d86ed7fbStbbdev 
155d86ed7fbStbbdev             col.r += avcol.r;
156d86ed7fbStbbdev             col.g += avcol.g;
157d86ed7fbStbbdev             col.b += avcol.b;
158d86ed7fbStbbdev         }
159d86ed7fbStbbdev 
160d86ed7fbStbbdev         col.r /= (scene.antialiasing + 1.0);
161d86ed7fbStbbdev         col.g /= (scene.antialiasing + 1.0);
162d86ed7fbStbbdev         col.b /= (scene.antialiasing + 1.0);
163d86ed7fbStbbdev     }
164d86ed7fbStbbdev 
165d86ed7fbStbbdev     /* Handle overexposure and underexposure here... */
166d86ed7fbStbbdev     R = (int)(col.r * 255);
167d86ed7fbStbbdev     if (R > 255)
168d86ed7fbStbbdev         R = 255;
169d86ed7fbStbbdev     else if (R < 0)
170d86ed7fbStbbdev         R = 0;
171d86ed7fbStbbdev 
172d86ed7fbStbbdev     G = (int)(col.g * 255);
173d86ed7fbStbbdev     if (G > 255)
174d86ed7fbStbbdev         G = 255;
175d86ed7fbStbbdev     else if (G < 0)
176d86ed7fbStbbdev         G = 0;
177d86ed7fbStbbdev 
178d86ed7fbStbbdev     B = (int)(col.b * 255);
179d86ed7fbStbbdev     if (B > 255)
180d86ed7fbStbbdev         B = 255;
181d86ed7fbStbbdev     else if (B < 0)
182d86ed7fbStbbdev         B = 0;
183d86ed7fbStbbdev 
184d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
185d86ed7fbStbbdev     R = int((1.0 - alpha) * R + alpha * blend[0]);
186d86ed7fbStbbdev     G = int((1.0 - alpha) * G + alpha * blend[1]);
187d86ed7fbStbbdev     B = int((1.0 - alpha) * B + alpha * blend[2]);
188d86ed7fbStbbdev #endif
189d86ed7fbStbbdev 
190d86ed7fbStbbdev     return video->get_color(R, G, B);
191d86ed7fbStbbdev }
192d86ed7fbStbbdev 
193d86ed7fbStbbdev class parallel_task {
194d86ed7fbStbbdev public:
operator ()(const oneapi::tbb::blocked_range2d<int> & r) const195d86ed7fbStbbdev     void operator()(const oneapi::tbb::blocked_range2d<int> &r) const {
196d86ed7fbStbbdev         // task-local storage
197d86ed7fbStbbdev         unsigned int serial = 1;
198d86ed7fbStbbdev         unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
199d86ed7fbStbbdev         unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
200d86ed7fbStbbdev         memset(local_mbox, 0, mboxsize);
201d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
202d86ed7fbStbbdev         // compute thread number while first task run
203d86ed7fbStbbdev         thread_id_t::reference thread_id = thread_ids.local();
204d86ed7fbStbbdev         if (thread_id == -1)
205d86ed7fbStbbdev             thread_id = thread_number++;
206d86ed7fbStbbdev         // choose thread color
207d86ed7fbStbbdev         int pos = thread_id % NUM_COLORS;
208d86ed7fbStbbdev         if (video->running) {
209d86ed7fbStbbdev             drawing_area drawing(r.cols().begin(),
210d86ed7fbStbbdev                                  totaly - r.rows().end(),
211d86ed7fbStbbdev                                  r.cols().end() - r.cols().begin(),
212d86ed7fbStbbdev                                  r.rows().end() - r.rows().begin());
213d86ed7fbStbbdev             for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
214d86ed7fbStbbdev                 drawing.set_pos(0, drawing.size_y - i);
215d86ed7fbStbbdev                 for (int x = r.cols().begin(); x != r.cols().end(); x++) {
216d86ed7fbStbbdev                     int d = (y % 3 == 0) ? 2 : 1;
217d86ed7fbStbbdev                     drawing.put_pixel(video->get_color(
218d86ed7fbStbbdev                         colors[pos][0] / d, colors[pos][1] / d, colors[pos][2] / d));
219d86ed7fbStbbdev                 }
220d86ed7fbStbbdev             }
221d86ed7fbStbbdev         }
222d86ed7fbStbbdev #endif
223d86ed7fbStbbdev         if (video->next_frame()) {
224d86ed7fbStbbdev             drawing_area drawing(r.cols().begin(),
225d86ed7fbStbbdev                                  totaly - r.rows().end(),
226d86ed7fbStbbdev                                  r.cols().end() - r.cols().begin(),
227d86ed7fbStbbdev                                  r.rows().end() - r.rows().begin());
228d86ed7fbStbbdev             for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
229d86ed7fbStbbdev                 drawing.set_pos(0, drawing.size_y - i);
230d86ed7fbStbbdev                 for (int x = r.cols().begin(); x != r.cols().end(); x++) {
231d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
232d86ed7fbStbbdev                     float alpha = y == r.rows().begin() || y == r.rows().end() - 1 ||
233d86ed7fbStbbdev                                           x == r.cols().begin() || x == r.cols().end() - 1
234d86ed7fbStbbdev                                       ? border_alpha
235d86ed7fbStbbdev                                       : inner_alpha;
236d86ed7fbStbbdev                     color_t c = render_one_pixel(
237d86ed7fbStbbdev                         x, y, local_mbox, serial, startx, stopx, starty, stopy, colors[pos], alpha);
238d86ed7fbStbbdev #else
239d86ed7fbStbbdev                     color_t c =
240d86ed7fbStbbdev                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
241d86ed7fbStbbdev #endif
242d86ed7fbStbbdev                     drawing.put_pixel(c);
243d86ed7fbStbbdev                 }
244d86ed7fbStbbdev             }
245d86ed7fbStbbdev         }
246d86ed7fbStbbdev     }
247d86ed7fbStbbdev 
parallel_task()248d86ed7fbStbbdev     parallel_task() {}
249d86ed7fbStbbdev };
250d86ed7fbStbbdev 
thread_trace(thr_parms * parms)251d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
252d86ed7fbStbbdev #if !WIN8UI_EXAMPLE
253d86ed7fbStbbdev     int n, nthreads = utility::get_default_num_threads();
254d86ed7fbStbbdev     char *nthreads_str = getenv("TBB_NUM_THREADS");
255d86ed7fbStbbdev     if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
256d86ed7fbStbbdev         nthreads = n;
257d86ed7fbStbbdev     oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism, nthreads);
258d86ed7fbStbbdev #endif
259d86ed7fbStbbdev 
260d86ed7fbStbbdev     // shared but read-only so could be private too
261d86ed7fbStbbdev     all_parms = parms;
262d86ed7fbStbbdev     scene = parms->scene;
263d86ed7fbStbbdev     startx = parms->startx;
264d86ed7fbStbbdev     stopx = parms->stopx;
265d86ed7fbStbbdev     starty = parms->starty;
266d86ed7fbStbbdev     stopy = parms->stopy;
267d86ed7fbStbbdev     jitterscale = 40.0 * (scene.hres + scene.vres);
268d86ed7fbStbbdev     totaly = parms->scene.vres;
269d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
270d86ed7fbStbbdev     thread_ids.clear();
271d86ed7fbStbbdev #endif
272d86ed7fbStbbdev 
273d86ed7fbStbbdev     int grain_size = 8;
274d86ed7fbStbbdev //WIN8UI does not support getenv() function so using auto_partitioner unconditionally
275d86ed7fbStbbdev #if !WIN8UI_EXAMPLE
276d86ed7fbStbbdev     int g;
277d86ed7fbStbbdev     char *grain_str = getenv("TBB_GRAINSIZE");
278d86ed7fbStbbdev     if (grain_str && (sscanf(grain_str, "%d", &g) > 0) && (g > 0))
279d86ed7fbStbbdev         grain_size = g;
280d86ed7fbStbbdev     char *sched_str = getenv("TBB_PARTITIONER");
281d86ed7fbStbbdev     static oneapi::tbb::affinity_partitioner g_ap; // reused across calls to thread_trace
282d86ed7fbStbbdev     if (sched_str && !strncmp(sched_str, "aff", 3))
283d86ed7fbStbbdev         oneapi::tbb::parallel_for(
284d86ed7fbStbbdev             oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
285d86ed7fbStbbdev             parallel_task(),
286d86ed7fbStbbdev             g_ap);
287d86ed7fbStbbdev     else if (sched_str && !strncmp(sched_str, "simp", 4))
288d86ed7fbStbbdev         oneapi::tbb::parallel_for(
289d86ed7fbStbbdev             oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
290d86ed7fbStbbdev             parallel_task(),
291d86ed7fbStbbdev             oneapi::tbb::simple_partitioner());
292d86ed7fbStbbdev     else
293d86ed7fbStbbdev #endif
294d86ed7fbStbbdev         oneapi::tbb::parallel_for(
295d86ed7fbStbbdev             oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
296d86ed7fbStbbdev             parallel_task(),
297d86ed7fbStbbdev             oneapi::tbb::auto_partitioner());
298d86ed7fbStbbdev 
299d86ed7fbStbbdev     return (nullptr);
300d86ed7fbStbbdev }
301