1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev The original source for this example is
19d86ed7fbStbbdev Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev All rights reserved.
21d86ed7fbStbbdev
22d86ed7fbStbbdev Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev modification, are permitted provided that the following conditions
24d86ed7fbStbbdev are met:
25d86ed7fbStbbdev 1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev 2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev 3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev derived from this software without specific prior written permission.
32d86ed7fbStbbdev
33d86ed7fbStbbdev THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev static int nthreads;
71d86ed7fbStbbdev
72d86ed7fbStbbdev static int grain_size = 50;
73d86ed7fbStbbdev const int DIVFACTOR = 2;
74d86ed7fbStbbdev
75d86ed7fbStbbdev #define MIN(a, b) ((a) < (b) ? (a) : (b))
76d86ed7fbStbbdev
77d86ed7fbStbbdev #ifdef _WIN32
78d86ed7fbStbbdev #include <windows.h>
79d86ed7fbStbbdev #include "pthread_w.hpp"
80d86ed7fbStbbdev #else
81d86ed7fbStbbdev #include <pthread.h>
82d86ed7fbStbbdev #endif
83d86ed7fbStbbdev
84d86ed7fbStbbdev static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
85d86ed7fbStbbdev
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)86d86ed7fbStbbdev static color_t render_one_pixel(int x,
87d86ed7fbStbbdev int y,
88d86ed7fbStbbdev unsigned int *local_mbox,
89d86ed7fbStbbdev unsigned int &serial,
90d86ed7fbStbbdev int startx,
91d86ed7fbStbbdev int stopx,
92d86ed7fbStbbdev int starty,
93d86ed7fbStbbdev int stopy) {
94d86ed7fbStbbdev /* private vars moved inside loop */
95d86ed7fbStbbdev ray primary, sample;
96d86ed7fbStbbdev color col, avcol;
97d86ed7fbStbbdev int R, G, B;
98d86ed7fbStbbdev intersectstruct local_intersections;
99d86ed7fbStbbdev int alias;
100d86ed7fbStbbdev /* end private */
101d86ed7fbStbbdev
102d86ed7fbStbbdev primary = camray(&scene, x, y);
103d86ed7fbStbbdev primary.intstruct = &local_intersections;
104d86ed7fbStbbdev primary.flags = RT_RAY_REGULAR;
105d86ed7fbStbbdev
106d86ed7fbStbbdev serial++;
107d86ed7fbStbbdev primary.serial = serial;
108d86ed7fbStbbdev primary.mbox = local_mbox;
109d86ed7fbStbbdev primary.maxdist = FHUGE;
110d86ed7fbStbbdev primary.scene = &scene;
111d86ed7fbStbbdev col = trace(&primary);
112d86ed7fbStbbdev
113d86ed7fbStbbdev serial = primary.serial;
114d86ed7fbStbbdev
115d86ed7fbStbbdev /* perform antialiasing if enabled.. */
116d86ed7fbStbbdev if (scene.antialiasing > 0) {
117d86ed7fbStbbdev for (alias = 0; alias < scene.antialiasing; alias++) {
118d86ed7fbStbbdev serial++; /* increment serial number */
119d86ed7fbStbbdev sample = primary; /* copy the regular primary ray to start with */
120d86ed7fbStbbdev sample.serial = serial;
121d86ed7fbStbbdev
122d86ed7fbStbbdev {
123d86ed7fbStbbdev pthread_mutex_lock(&MyMutex);
124d86ed7fbStbbdev sample.d.x += ((rand() % 100) - 50) / jitterscale;
125d86ed7fbStbbdev sample.d.y += ((rand() % 100) - 50) / jitterscale;
126d86ed7fbStbbdev sample.d.z += ((rand() % 100) - 50) / jitterscale;
127d86ed7fbStbbdev pthread_mutex_unlock(&MyMutex);
128d86ed7fbStbbdev }
129d86ed7fbStbbdev
130d86ed7fbStbbdev avcol = trace(&sample);
131d86ed7fbStbbdev
132d86ed7fbStbbdev serial = sample.serial; /* update our overall serial # */
133d86ed7fbStbbdev
134d86ed7fbStbbdev col.r += avcol.r;
135d86ed7fbStbbdev col.g += avcol.g;
136d86ed7fbStbbdev col.b += avcol.b;
137d86ed7fbStbbdev }
138d86ed7fbStbbdev
139d86ed7fbStbbdev col.r /= (scene.antialiasing + 1.0);
140d86ed7fbStbbdev col.g /= (scene.antialiasing + 1.0);
141d86ed7fbStbbdev col.b /= (scene.antialiasing + 1.0);
142d86ed7fbStbbdev }
143d86ed7fbStbbdev
144d86ed7fbStbbdev /* Handle overexposure and underexposure here... */
145d86ed7fbStbbdev R = (int)(col.r * 255);
146d86ed7fbStbbdev if (R > 255)
147d86ed7fbStbbdev R = 255;
148d86ed7fbStbbdev else if (R < 0)
149d86ed7fbStbbdev R = 0;
150d86ed7fbStbbdev
151d86ed7fbStbbdev G = (int)(col.g * 255);
152d86ed7fbStbbdev if (G > 255)
153d86ed7fbStbbdev G = 255;
154d86ed7fbStbbdev else if (G < 0)
155d86ed7fbStbbdev G = 0;
156d86ed7fbStbbdev
157d86ed7fbStbbdev B = (int)(col.b * 255);
158d86ed7fbStbbdev if (B > 255)
159d86ed7fbStbbdev B = 255;
160d86ed7fbStbbdev else if (B < 0)
161d86ed7fbStbbdev B = 0;
162d86ed7fbStbbdev
163d86ed7fbStbbdev return video->get_color(R, G, B);
164d86ed7fbStbbdev }
165d86ed7fbStbbdev
166d86ed7fbStbbdev // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
167d86ed7fbStbbdev
168d86ed7fbStbbdev typedef struct work_queue_entry_s {
169d86ed7fbStbbdev patch pch;
170d86ed7fbStbbdev struct work_queue_entry_s *next;
171d86ed7fbStbbdev } work_queue_entry_t;
172d86ed7fbStbbdev static work_queue_entry_t *work_queue_head = nullptr;
173d86ed7fbStbbdev static work_queue_entry_t *work_queue_tail = nullptr;
174d86ed7fbStbbdev
generate_work(patch * pchin)175d86ed7fbStbbdev static void generate_work(patch *pchin) {
176d86ed7fbStbbdev int startx, stopx, starty, stopy;
177d86ed7fbStbbdev int xs, ys;
178d86ed7fbStbbdev
179d86ed7fbStbbdev startx = pchin->startx;
180d86ed7fbStbbdev stopx = pchin->stopx;
181d86ed7fbStbbdev starty = pchin->starty;
182d86ed7fbStbbdev stopy = pchin->stopy;
183d86ed7fbStbbdev
184d86ed7fbStbbdev if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) {
185d86ed7fbStbbdev int xpatchsize = (stopx - startx) / DIVFACTOR + 1;
186d86ed7fbStbbdev int ypatchsize = (stopy - starty) / DIVFACTOR + 1;
187d86ed7fbStbbdev for (ys = starty; ys <= stopy; ys += ypatchsize)
188d86ed7fbStbbdev for (xs = startx; xs <= stopx; xs += xpatchsize) {
189d86ed7fbStbbdev patch pch;
190d86ed7fbStbbdev pch.startx = xs;
191d86ed7fbStbbdev pch.starty = ys;
192d86ed7fbStbbdev pch.stopx = MIN(xs + xpatchsize, stopx);
193d86ed7fbStbbdev pch.stopy = MIN(ys + ypatchsize, stopy);
194d86ed7fbStbbdev
195d86ed7fbStbbdev generate_work(&pch);
196d86ed7fbStbbdev }
197d86ed7fbStbbdev }
198d86ed7fbStbbdev else {
199d86ed7fbStbbdev /* just trace this patch */
200d86ed7fbStbbdev work_queue_entry_t *q = (work_queue_entry_t *)malloc(sizeof(work_queue_entry_t));
201d86ed7fbStbbdev q->pch.starty = starty;
202d86ed7fbStbbdev q->pch.stopy = stopy;
203d86ed7fbStbbdev q->pch.startx = startx;
204d86ed7fbStbbdev q->pch.stopx = stopx;
205d86ed7fbStbbdev q->next = nullptr;
206d86ed7fbStbbdev if (work_queue_head == nullptr) {
207d86ed7fbStbbdev work_queue_head = q;
208d86ed7fbStbbdev }
209d86ed7fbStbbdev else {
210d86ed7fbStbbdev work_queue_tail->next = q;
211d86ed7fbStbbdev }
212d86ed7fbStbbdev work_queue_tail = q;
213d86ed7fbStbbdev }
214d86ed7fbStbbdev }
215d86ed7fbStbbdev
generate_worklist(void)216d86ed7fbStbbdev static void generate_worklist(void) {
217d86ed7fbStbbdev patch pch;
218d86ed7fbStbbdev pch.startx = startx;
219d86ed7fbStbbdev pch.stopx = stopx;
220d86ed7fbStbbdev pch.starty = starty;
221d86ed7fbStbbdev pch.stopy = stopy;
222d86ed7fbStbbdev generate_work(&pch);
223d86ed7fbStbbdev }
224d86ed7fbStbbdev
schedule_thread_work(patch & pch)225d86ed7fbStbbdev static bool schedule_thread_work(patch &pch) {
226d86ed7fbStbbdev pthread_mutex_lock(&MyMutex3);
227d86ed7fbStbbdev work_queue_entry_t *q = work_queue_head;
228d86ed7fbStbbdev if (q != nullptr) {
229d86ed7fbStbbdev pch = q->pch;
230d86ed7fbStbbdev work_queue_head = work_queue_head->next;
231d86ed7fbStbbdev }
232d86ed7fbStbbdev pthread_mutex_unlock(&MyMutex3);
233d86ed7fbStbbdev return (q != nullptr);
234d86ed7fbStbbdev }
235d86ed7fbStbbdev
parallel_thread(void * arg)236d86ed7fbStbbdev static void parallel_thread(void *arg) {
237d86ed7fbStbbdev // thread-local storage
238d86ed7fbStbbdev unsigned int serial = 1;
239d86ed7fbStbbdev unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
240d86ed7fbStbbdev unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
241d86ed7fbStbbdev memset(local_mbox, 0, mboxsize);
242d86ed7fbStbbdev
243d86ed7fbStbbdev // int thread_no = (int) arg;
244d86ed7fbStbbdev patch pch;
245d86ed7fbStbbdev while (schedule_thread_work(pch)) {
246d86ed7fbStbbdev {
247d86ed7fbStbbdev drawing_area drawing(
248d86ed7fbStbbdev pch.startx, totaly - pch.stopy, pch.stopx - pch.startx, pch.stopy - pch.starty);
249d86ed7fbStbbdev for (int i = 1, y = pch.starty; y < pch.stopy; ++y, i++) {
250d86ed7fbStbbdev drawing.set_pos(0, drawing.size_y - i);
251d86ed7fbStbbdev for (int x = pch.startx; x < pch.stopx; x++) {
252d86ed7fbStbbdev color_t c =
253d86ed7fbStbbdev render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
254d86ed7fbStbbdev drawing.put_pixel(c);
255d86ed7fbStbbdev }
256d86ed7fbStbbdev }
257d86ed7fbStbbdev }
258d86ed7fbStbbdev if (!video->next_frame())
259d86ed7fbStbbdev pthread_exit(arg);
260d86ed7fbStbbdev }
261d86ed7fbStbbdev pthread_exit(arg);
262d86ed7fbStbbdev }
263d86ed7fbStbbdev
264d86ed7fbStbbdev // need this (for each platform) so we can create the right number of threads, to work efficiently
265d86ed7fbStbbdev
266d86ed7fbStbbdev #if defined(_WIN32)
267d86ed7fbStbbdev
get_num_cpus(void)268d86ed7fbStbbdev static int get_num_cpus(void) {
269d86ed7fbStbbdev SYSTEM_INFO si;
270d86ed7fbStbbdev GetNativeSystemInfo(&si);
271d86ed7fbStbbdev return (int)si.dwNumberOfProcessors;
272d86ed7fbStbbdev }
273d86ed7fbStbbdev
274d86ed7fbStbbdev #elif defined(__APPLE__)
275d86ed7fbStbbdev
276d86ed7fbStbbdev #include "sys/types.hpp"
277d86ed7fbStbbdev #include "sys/sysctl.hpp"
get_num_cpus(void)278d86ed7fbStbbdev static int get_num_cpus(void) {
279d86ed7fbStbbdev int name[2] = { CTL_HW, HW_NCPU };
280d86ed7fbStbbdev int ncpu;
281d86ed7fbStbbdev std::size_t size = sizeof(ncpu);
282d86ed7fbStbbdev sysctl(name, 2, &ncpu, &size, nullptr, 0);
283d86ed7fbStbbdev return ncpu;
284d86ed7fbStbbdev }
285d86ed7fbStbbdev
286d86ed7fbStbbdev #else /* Linux */
287d86ed7fbStbbdev
288d86ed7fbStbbdev #include <sys/sysinfo.h>
get_num_cpus(void)289d86ed7fbStbbdev static int get_num_cpus(void) {
290d86ed7fbStbbdev return get_nprocs();
291d86ed7fbStbbdev }
292d86ed7fbStbbdev
293d86ed7fbStbbdev #endif
294d86ed7fbStbbdev
thread_trace(thr_parms * parms)295d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
296d86ed7fbStbbdev // shared but read-only so could be private too
297d86ed7fbStbbdev all_parms = parms;
298d86ed7fbStbbdev scene = parms->scene;
299d86ed7fbStbbdev startx = parms->startx;
300d86ed7fbStbbdev stopx = parms->stopx;
301d86ed7fbStbbdev starty = parms->starty;
302d86ed7fbStbbdev stopy = parms->stopy;
303d86ed7fbStbbdev jitterscale = 40.0 * (scene.hres + scene.vres);
304d86ed7fbStbbdev totaly = parms->scene.vres;
305d86ed7fbStbbdev
306d86ed7fbStbbdev int n;
307d86ed7fbStbbdev nthreads = get_num_cpus();
308d86ed7fbStbbdev char *nthreads_str = getenv("THR_NUM_THREADS");
309d86ed7fbStbbdev if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
310d86ed7fbStbbdev nthreads = n;
311d86ed7fbStbbdev char *grain_str = getenv("THR_GRAINSIZE");
312d86ed7fbStbbdev if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
313d86ed7fbStbbdev grain_size = n;
314d86ed7fbStbbdev pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
315d86ed7fbStbbdev pthread_mutex_init(&MyMutex, nullptr);
316d86ed7fbStbbdev pthread_mutex_init(&MyMutex2, nullptr);
317d86ed7fbStbbdev pthread_mutex_init(&MyMutex3, nullptr);
318d86ed7fbStbbdev generate_worklist(); // initialize schedule_thread_work() self-scheduler
319d86ed7fbStbbdev for (int i = 0; i < nthreads; i++) {
320d86ed7fbStbbdev pthread_create(
321d86ed7fbStbbdev &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
322d86ed7fbStbbdev }
323d86ed7fbStbbdev for (int i = 0; i < nthreads; i++) {
324d86ed7fbStbbdev void *exit_val;
325d86ed7fbStbbdev pthread_join(threads[i], &exit_val);
326d86ed7fbStbbdev // expect i = (int) exit_val
327d86ed7fbStbbdev }
328d86ed7fbStbbdev
329d86ed7fbStbbdev return (nullptr);
330d86ed7fbStbbdev }
331