1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*
18 The original source for this example is
19 Copyright (c) 1994-2008 John E. Stone
20 All rights reserved.
21
22 Redistribution and use in source and binary forms, with or without
23 modification, are permitted provided that the following conditions
24 are met:
25 1. Redistributions of source code must retain the above copyright
26 notice, this list of conditions and the following disclaimer.
27 2. Redistributions in binary form must reproduce the above copyright
28 notice, this list of conditions and the following disclaimer in the
29 documentation and/or other materials provided with the distribution.
30 3. The name of the author may not be used to endorse or promote products
31 derived from this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 SUCH DAMAGE.
44 */
45
46 #include "machine.hpp"
47 #include "types.hpp"
48 #include "macros.hpp"
49 #include "vector.hpp"
50 #include "tgafile.hpp"
51 #include "trace.hpp"
52 #include "light.hpp"
53 #include "shade.hpp"
54 #include "camera.hpp"
55 #include "util.hpp"
56 #include "intersect.hpp"
57 #include "global.hpp"
58 #include "ui.hpp"
59 #include "tachyon_video.hpp"
60
61 // shared but read-only so could be private too
62 static thr_parms *all_parms;
63 static scenedef scene;
64 static int startx;
65 static int stopx;
66 static int starty;
67 static int stopy;
68 static flt jitterscale;
69 static int totaly;
70
71 #ifdef MARK_RENDERING_AREA
72
73 // rgb colors list for coloring image by each thread
74 static const float inner_alpha = 0.3;
75 static const float border_alpha = 0.5;
76 #define NUM_COLORS 24
77 static int colors[NUM_COLORS][3] = {
78 { 255, 110, 0 }, { 220, 254, 0 }, { 102, 254, 0 }, { 0, 21, 254 }, { 97, 0, 254 },
79 { 254, 30, 0 }, { 20, 41, 8 }, { 144, 238, 38 }, { 184, 214, 139 }, { 28, 95, 20 },
80 { 139, 173, 148 }, { 188, 228, 183 }, { 145, 47, 56 }, { 204, 147, 193 }, { 45, 202, 143 },
81 { 204, 171, 143 }, { 143, 160, 204 }, { 220, 173, 3 }, { 1, 152, 231 }, { 79, 235, 237 },
82 { 52, 193, 72 }, { 67, 136, 151 }, { 78, 87, 179 }, { 143, 255, 9 },
83 };
84
85 #include <atomic>
86 #include "oneapi/tbb/enumerable_thread_specific.h"
87 // storage and counter for thread numbers in order of first task run
88 typedef oneapi::tbb::enumerable_thread_specific<int> thread_id_t;
89 thread_id_t thread_ids(-1);
90 std::atomic<int> thread_number;
91
92 #endif
93
94 #include "oneapi/tbb/parallel_for.h"
95 #include "oneapi/tbb/spin_mutex.h"
96 #include "oneapi/tbb/blocked_range2d.h"
97 #include "oneapi/tbb/global_control.h"
98 #include "common/utility/get_default_num_threads.hpp"
99
100 static oneapi::tbb::spin_mutex MyMutex, MyMutex2;
101
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy,int * blend,float alpha)102 static color_t render_one_pixel(int x,
103 int y,
104 unsigned int *local_mbox,
105 unsigned int &serial,
106 int startx,
107 int stopx,
108 int starty,
109 int stopy
110 #ifdef MARK_RENDERING_AREA
111 ,
112 int *blend,
113 float alpha
114 #endif
115 ) {
116 /* private vars moved inside loop */
117 ray primary, sample;
118 color col, avcol;
119 int R, G, B;
120 intersectstruct local_intersections;
121 int alias;
122 /* end private */
123
124 primary = camray(&scene, x, y);
125 primary.intstruct = &local_intersections;
126 primary.flags = RT_RAY_REGULAR;
127
128 serial++;
129 primary.serial = serial;
130 primary.mbox = local_mbox;
131 primary.maxdist = FHUGE;
132 primary.scene = &scene;
133 col = trace(&primary);
134
135 serial = primary.serial;
136
137 /* perform antialiasing if enabled.. */
138 if (scene.antialiasing > 0) {
139 for (alias = 0; alias < scene.antialiasing; alias++) {
140 serial++; /* increment serial number */
141 sample = primary; /* copy the regular primary ray to start with */
142 sample.serial = serial;
143
144 {
145 oneapi::tbb::spin_mutex::scoped_lock lock(MyMutex);
146 sample.d.x += ((rand() % 100) - 50) / jitterscale;
147 sample.d.y += ((rand() % 100) - 50) / jitterscale;
148 sample.d.z += ((rand() % 100) - 50) / jitterscale;
149 }
150
151 avcol = trace(&sample);
152
153 serial = sample.serial; /* update our overall serial # */
154
155 col.r += avcol.r;
156 col.g += avcol.g;
157 col.b += avcol.b;
158 }
159
160 col.r /= (scene.antialiasing + 1.0);
161 col.g /= (scene.antialiasing + 1.0);
162 col.b /= (scene.antialiasing + 1.0);
163 }
164
165 /* Handle overexposure and underexposure here... */
166 R = (int)(col.r * 255);
167 if (R > 255)
168 R = 255;
169 else if (R < 0)
170 R = 0;
171
172 G = (int)(col.g * 255);
173 if (G > 255)
174 G = 255;
175 else if (G < 0)
176 G = 0;
177
178 B = (int)(col.b * 255);
179 if (B > 255)
180 B = 255;
181 else if (B < 0)
182 B = 0;
183
184 #ifdef MARK_RENDERING_AREA
185 R = int((1.0 - alpha) * R + alpha * blend[0]);
186 G = int((1.0 - alpha) * G + alpha * blend[1]);
187 B = int((1.0 - alpha) * B + alpha * blend[2]);
188 #endif
189
190 return video->get_color(R, G, B);
191 }
192
193 class parallel_task {
194 public:
operator ()(const oneapi::tbb::blocked_range2d<int> & r) const195 void operator()(const oneapi::tbb::blocked_range2d<int> &r) const {
196 // task-local storage
197 unsigned int serial = 1;
198 unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
199 unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
200 memset(local_mbox, 0, mboxsize);
201 #ifdef MARK_RENDERING_AREA
202 // compute thread number while first task run
203 thread_id_t::reference thread_id = thread_ids.local();
204 if (thread_id == -1)
205 thread_id = thread_number++;
206 // choose thread color
207 int pos = thread_id % NUM_COLORS;
208 if (video->running) {
209 drawing_area drawing(r.cols().begin(),
210 totaly - r.rows().end(),
211 r.cols().end() - r.cols().begin(),
212 r.rows().end() - r.rows().begin());
213 for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
214 drawing.set_pos(0, drawing.size_y - i);
215 for (int x = r.cols().begin(); x != r.cols().end(); x++) {
216 int d = (y % 3 == 0) ? 2 : 1;
217 drawing.put_pixel(video->get_color(
218 colors[pos][0] / d, colors[pos][1] / d, colors[pos][2] / d));
219 }
220 }
221 }
222 #endif
223 if (video->next_frame()) {
224 drawing_area drawing(r.cols().begin(),
225 totaly - r.rows().end(),
226 r.cols().end() - r.cols().begin(),
227 r.rows().end() - r.rows().begin());
228 for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
229 drawing.set_pos(0, drawing.size_y - i);
230 for (int x = r.cols().begin(); x != r.cols().end(); x++) {
231 #ifdef MARK_RENDERING_AREA
232 float alpha = y == r.rows().begin() || y == r.rows().end() - 1 ||
233 x == r.cols().begin() || x == r.cols().end() - 1
234 ? border_alpha
235 : inner_alpha;
236 color_t c = render_one_pixel(
237 x, y, local_mbox, serial, startx, stopx, starty, stopy, colors[pos], alpha);
238 #else
239 color_t c =
240 render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
241 #endif
242 drawing.put_pixel(c);
243 }
244 }
245 }
246 }
247
parallel_task()248 parallel_task() {}
249 };
250
thread_trace(thr_parms * parms)251 void *thread_trace(thr_parms *parms) {
252 #if !WIN8UI_EXAMPLE
253 int n, nthreads = utility::get_default_num_threads();
254 char *nthreads_str = getenv("TBB_NUM_THREADS");
255 if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
256 nthreads = n;
257 oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism, nthreads);
258 #endif
259
260 // shared but read-only so could be private too
261 all_parms = parms;
262 scene = parms->scene;
263 startx = parms->startx;
264 stopx = parms->stopx;
265 starty = parms->starty;
266 stopy = parms->stopy;
267 jitterscale = 40.0 * (scene.hres + scene.vres);
268 totaly = parms->scene.vres;
269 #ifdef MARK_RENDERING_AREA
270 thread_ids.clear();
271 #endif
272
273 int grain_size = 8;
274 //WIN8UI does not support getenv() function so using auto_partitioner unconditionally
275 #if !WIN8UI_EXAMPLE
276 int g;
277 char *grain_str = getenv("TBB_GRAINSIZE");
278 if (grain_str && (sscanf(grain_str, "%d", &g) > 0) && (g > 0))
279 grain_size = g;
280 char *sched_str = getenv("TBB_PARTITIONER");
281 static oneapi::tbb::affinity_partitioner g_ap; // reused across calls to thread_trace
282 if (sched_str && !strncmp(sched_str, "aff", 3))
283 oneapi::tbb::parallel_for(
284 oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
285 parallel_task(),
286 g_ap);
287 else if (sched_str && !strncmp(sched_str, "simp", 4))
288 oneapi::tbb::parallel_for(
289 oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
290 parallel_task(),
291 oneapi::tbb::simple_partitioner());
292 else
293 #endif
294 oneapi::tbb::parallel_for(
295 oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
296 parallel_task(),
297 oneapi::tbb::auto_partitioner());
298
299 return (nullptr);
300 }
301