1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev The original source for this example is
19d86ed7fbStbbdev Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev All rights reserved.
21d86ed7fbStbbdev
22d86ed7fbStbbdev Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev modification, are permitted provided that the following conditions
24d86ed7fbStbbdev are met:
25d86ed7fbStbbdev 1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev 2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev 3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev derived from this software without specific prior written permission.
32d86ed7fbStbbdev
33d86ed7fbStbbdev THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev
71d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
72d86ed7fbStbbdev
73d86ed7fbStbbdev // rgb colors list for coloring image by each thread
74d86ed7fbStbbdev static const float inner_alpha = 0.3;
75d86ed7fbStbbdev static const float border_alpha = 0.5;
76d86ed7fbStbbdev #define NUM_COLORS 24
77d86ed7fbStbbdev static int colors[NUM_COLORS][3] = {
78d86ed7fbStbbdev { 255, 110, 0 }, { 220, 254, 0 }, { 102, 254, 0 }, { 0, 21, 254 }, { 97, 0, 254 },
79d86ed7fbStbbdev { 254, 30, 0 }, { 20, 41, 8 }, { 144, 238, 38 }, { 184, 214, 139 }, { 28, 95, 20 },
80d86ed7fbStbbdev { 139, 173, 148 }, { 188, 228, 183 }, { 145, 47, 56 }, { 204, 147, 193 }, { 45, 202, 143 },
81d86ed7fbStbbdev { 204, 171, 143 }, { 143, 160, 204 }, { 220, 173, 3 }, { 1, 152, 231 }, { 79, 235, 237 },
82d86ed7fbStbbdev { 52, 193, 72 }, { 67, 136, 151 }, { 78, 87, 179 }, { 143, 255, 9 },
83d86ed7fbStbbdev };
84d86ed7fbStbbdev
85d86ed7fbStbbdev #include <atomic>
86d86ed7fbStbbdev #include "oneapi/tbb/enumerable_thread_specific.h"
87d86ed7fbStbbdev // storage and counter for thread numbers in order of first task run
88d86ed7fbStbbdev typedef oneapi::tbb::enumerable_thread_specific<int> thread_id_t;
89d86ed7fbStbbdev thread_id_t thread_ids(-1);
90d86ed7fbStbbdev std::atomic<int> thread_number;
91d86ed7fbStbbdev
92d86ed7fbStbbdev #endif
93d86ed7fbStbbdev
94d86ed7fbStbbdev #include "oneapi/tbb/parallel_for.h"
95d86ed7fbStbbdev #include "oneapi/tbb/spin_mutex.h"
96d86ed7fbStbbdev #include "oneapi/tbb/blocked_range2d.h"
97d86ed7fbStbbdev #include "oneapi/tbb/global_control.h"
98d86ed7fbStbbdev #include "common/utility/get_default_num_threads.hpp"
99d86ed7fbStbbdev
100d86ed7fbStbbdev static oneapi::tbb::spin_mutex MyMutex, MyMutex2;
101d86ed7fbStbbdev
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy,int * blend,float alpha)102d86ed7fbStbbdev static color_t render_one_pixel(int x,
103d86ed7fbStbbdev int y,
104d86ed7fbStbbdev unsigned int *local_mbox,
105d86ed7fbStbbdev unsigned int &serial,
106d86ed7fbStbbdev int startx,
107d86ed7fbStbbdev int stopx,
108d86ed7fbStbbdev int starty,
109d86ed7fbStbbdev int stopy
110d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
111d86ed7fbStbbdev ,
112d86ed7fbStbbdev int *blend,
113d86ed7fbStbbdev float alpha
114d86ed7fbStbbdev #endif
115d86ed7fbStbbdev ) {
116d86ed7fbStbbdev /* private vars moved inside loop */
117d86ed7fbStbbdev ray primary, sample;
118d86ed7fbStbbdev color col, avcol;
119d86ed7fbStbbdev int R, G, B;
120d86ed7fbStbbdev intersectstruct local_intersections;
121d86ed7fbStbbdev int alias;
122d86ed7fbStbbdev /* end private */
123d86ed7fbStbbdev
124d86ed7fbStbbdev primary = camray(&scene, x, y);
125d86ed7fbStbbdev primary.intstruct = &local_intersections;
126d86ed7fbStbbdev primary.flags = RT_RAY_REGULAR;
127d86ed7fbStbbdev
128d86ed7fbStbbdev serial++;
129d86ed7fbStbbdev primary.serial = serial;
130d86ed7fbStbbdev primary.mbox = local_mbox;
131d86ed7fbStbbdev primary.maxdist = FHUGE;
132d86ed7fbStbbdev primary.scene = &scene;
133d86ed7fbStbbdev col = trace(&primary);
134d86ed7fbStbbdev
135d86ed7fbStbbdev serial = primary.serial;
136d86ed7fbStbbdev
137d86ed7fbStbbdev /* perform antialiasing if enabled.. */
138d86ed7fbStbbdev if (scene.antialiasing > 0) {
139d86ed7fbStbbdev for (alias = 0; alias < scene.antialiasing; alias++) {
140d86ed7fbStbbdev serial++; /* increment serial number */
141d86ed7fbStbbdev sample = primary; /* copy the regular primary ray to start with */
142d86ed7fbStbbdev sample.serial = serial;
143d86ed7fbStbbdev
144d86ed7fbStbbdev {
145d86ed7fbStbbdev oneapi::tbb::spin_mutex::scoped_lock lock(MyMutex);
146d86ed7fbStbbdev sample.d.x += ((rand() % 100) - 50) / jitterscale;
147d86ed7fbStbbdev sample.d.y += ((rand() % 100) - 50) / jitterscale;
148d86ed7fbStbbdev sample.d.z += ((rand() % 100) - 50) / jitterscale;
149d86ed7fbStbbdev }
150d86ed7fbStbbdev
151d86ed7fbStbbdev avcol = trace(&sample);
152d86ed7fbStbbdev
153d86ed7fbStbbdev serial = sample.serial; /* update our overall serial # */
154d86ed7fbStbbdev
155d86ed7fbStbbdev col.r += avcol.r;
156d86ed7fbStbbdev col.g += avcol.g;
157d86ed7fbStbbdev col.b += avcol.b;
158d86ed7fbStbbdev }
159d86ed7fbStbbdev
160d86ed7fbStbbdev col.r /= (scene.antialiasing + 1.0);
161d86ed7fbStbbdev col.g /= (scene.antialiasing + 1.0);
162d86ed7fbStbbdev col.b /= (scene.antialiasing + 1.0);
163d86ed7fbStbbdev }
164d86ed7fbStbbdev
165d86ed7fbStbbdev /* Handle overexposure and underexposure here... */
166d86ed7fbStbbdev R = (int)(col.r * 255);
167d86ed7fbStbbdev if (R > 255)
168d86ed7fbStbbdev R = 255;
169d86ed7fbStbbdev else if (R < 0)
170d86ed7fbStbbdev R = 0;
171d86ed7fbStbbdev
172d86ed7fbStbbdev G = (int)(col.g * 255);
173d86ed7fbStbbdev if (G > 255)
174d86ed7fbStbbdev G = 255;
175d86ed7fbStbbdev else if (G < 0)
176d86ed7fbStbbdev G = 0;
177d86ed7fbStbbdev
178d86ed7fbStbbdev B = (int)(col.b * 255);
179d86ed7fbStbbdev if (B > 255)
180d86ed7fbStbbdev B = 255;
181d86ed7fbStbbdev else if (B < 0)
182d86ed7fbStbbdev B = 0;
183d86ed7fbStbbdev
184d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
185d86ed7fbStbbdev R = int((1.0 - alpha) * R + alpha * blend[0]);
186d86ed7fbStbbdev G = int((1.0 - alpha) * G + alpha * blend[1]);
187d86ed7fbStbbdev B = int((1.0 - alpha) * B + alpha * blend[2]);
188d86ed7fbStbbdev #endif
189d86ed7fbStbbdev
190d86ed7fbStbbdev return video->get_color(R, G, B);
191d86ed7fbStbbdev }
192d86ed7fbStbbdev
193d86ed7fbStbbdev class parallel_task {
194d86ed7fbStbbdev public:
operator ()(const oneapi::tbb::blocked_range2d<int> & r) const195d86ed7fbStbbdev void operator()(const oneapi::tbb::blocked_range2d<int> &r) const {
196d86ed7fbStbbdev // task-local storage
197d86ed7fbStbbdev unsigned int serial = 1;
198d86ed7fbStbbdev unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
199d86ed7fbStbbdev unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
200d86ed7fbStbbdev memset(local_mbox, 0, mboxsize);
201d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
202d86ed7fbStbbdev // compute thread number while first task run
203d86ed7fbStbbdev thread_id_t::reference thread_id = thread_ids.local();
204d86ed7fbStbbdev if (thread_id == -1)
205d86ed7fbStbbdev thread_id = thread_number++;
206d86ed7fbStbbdev // choose thread color
207d86ed7fbStbbdev int pos = thread_id % NUM_COLORS;
208d86ed7fbStbbdev if (video->running) {
209d86ed7fbStbbdev drawing_area drawing(r.cols().begin(),
210d86ed7fbStbbdev totaly - r.rows().end(),
211d86ed7fbStbbdev r.cols().end() - r.cols().begin(),
212d86ed7fbStbbdev r.rows().end() - r.rows().begin());
213d86ed7fbStbbdev for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
214d86ed7fbStbbdev drawing.set_pos(0, drawing.size_y - i);
215d86ed7fbStbbdev for (int x = r.cols().begin(); x != r.cols().end(); x++) {
216d86ed7fbStbbdev int d = (y % 3 == 0) ? 2 : 1;
217d86ed7fbStbbdev drawing.put_pixel(video->get_color(
218d86ed7fbStbbdev colors[pos][0] / d, colors[pos][1] / d, colors[pos][2] / d));
219d86ed7fbStbbdev }
220d86ed7fbStbbdev }
221d86ed7fbStbbdev }
222d86ed7fbStbbdev #endif
223d86ed7fbStbbdev if (video->next_frame()) {
224d86ed7fbStbbdev drawing_area drawing(r.cols().begin(),
225d86ed7fbStbbdev totaly - r.rows().end(),
226d86ed7fbStbbdev r.cols().end() - r.cols().begin(),
227d86ed7fbStbbdev r.rows().end() - r.rows().begin());
228d86ed7fbStbbdev for (int i = 1, y = r.rows().begin(); y != r.rows().end(); ++y, i++) {
229d86ed7fbStbbdev drawing.set_pos(0, drawing.size_y - i);
230d86ed7fbStbbdev for (int x = r.cols().begin(); x != r.cols().end(); x++) {
231d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
232d86ed7fbStbbdev float alpha = y == r.rows().begin() || y == r.rows().end() - 1 ||
233d86ed7fbStbbdev x == r.cols().begin() || x == r.cols().end() - 1
234d86ed7fbStbbdev ? border_alpha
235d86ed7fbStbbdev : inner_alpha;
236d86ed7fbStbbdev color_t c = render_one_pixel(
237d86ed7fbStbbdev x, y, local_mbox, serial, startx, stopx, starty, stopy, colors[pos], alpha);
238d86ed7fbStbbdev #else
239d86ed7fbStbbdev color_t c =
240d86ed7fbStbbdev render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
241d86ed7fbStbbdev #endif
242d86ed7fbStbbdev drawing.put_pixel(c);
243d86ed7fbStbbdev }
244d86ed7fbStbbdev }
245d86ed7fbStbbdev }
246d86ed7fbStbbdev }
247d86ed7fbStbbdev
parallel_task()248d86ed7fbStbbdev parallel_task() {}
249d86ed7fbStbbdev };
250d86ed7fbStbbdev
thread_trace(thr_parms * parms)251d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
252d86ed7fbStbbdev #if !WIN8UI_EXAMPLE
253d86ed7fbStbbdev int n, nthreads = utility::get_default_num_threads();
254d86ed7fbStbbdev char *nthreads_str = getenv("TBB_NUM_THREADS");
255d86ed7fbStbbdev if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
256d86ed7fbStbbdev nthreads = n;
257d86ed7fbStbbdev oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism, nthreads);
258d86ed7fbStbbdev #endif
259d86ed7fbStbbdev
260d86ed7fbStbbdev // shared but read-only so could be private too
261d86ed7fbStbbdev all_parms = parms;
262d86ed7fbStbbdev scene = parms->scene;
263d86ed7fbStbbdev startx = parms->startx;
264d86ed7fbStbbdev stopx = parms->stopx;
265d86ed7fbStbbdev starty = parms->starty;
266d86ed7fbStbbdev stopy = parms->stopy;
267d86ed7fbStbbdev jitterscale = 40.0 * (scene.hres + scene.vres);
268d86ed7fbStbbdev totaly = parms->scene.vres;
269d86ed7fbStbbdev #ifdef MARK_RENDERING_AREA
270d86ed7fbStbbdev thread_ids.clear();
271d86ed7fbStbbdev #endif
272d86ed7fbStbbdev
273d86ed7fbStbbdev int grain_size = 8;
274d86ed7fbStbbdev //WIN8UI does not support getenv() function so using auto_partitioner unconditionally
275d86ed7fbStbbdev #if !WIN8UI_EXAMPLE
276d86ed7fbStbbdev int g;
277d86ed7fbStbbdev char *grain_str = getenv("TBB_GRAINSIZE");
278d86ed7fbStbbdev if (grain_str && (sscanf(grain_str, "%d", &g) > 0) && (g > 0))
279d86ed7fbStbbdev grain_size = g;
280d86ed7fbStbbdev char *sched_str = getenv("TBB_PARTITIONER");
281d86ed7fbStbbdev static oneapi::tbb::affinity_partitioner g_ap; // reused across calls to thread_trace
282d86ed7fbStbbdev if (sched_str && !strncmp(sched_str, "aff", 3))
283d86ed7fbStbbdev oneapi::tbb::parallel_for(
284d86ed7fbStbbdev oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
285d86ed7fbStbbdev parallel_task(),
286d86ed7fbStbbdev g_ap);
287d86ed7fbStbbdev else if (sched_str && !strncmp(sched_str, "simp", 4))
288d86ed7fbStbbdev oneapi::tbb::parallel_for(
289d86ed7fbStbbdev oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
290d86ed7fbStbbdev parallel_task(),
291d86ed7fbStbbdev oneapi::tbb::simple_partitioner());
292d86ed7fbStbbdev else
293d86ed7fbStbbdev #endif
294d86ed7fbStbbdev oneapi::tbb::parallel_for(
295d86ed7fbStbbdev oneapi::tbb::blocked_range2d<int>(starty, stopy, grain_size, startx, stopx, grain_size),
296d86ed7fbStbbdev parallel_task(),
297d86ed7fbStbbdev oneapi::tbb::auto_partitioner());
298d86ed7fbStbbdev
299d86ed7fbStbbdev return (nullptr);
300d86ed7fbStbbdev }
301