1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev The original source for this example is
19d86ed7fbStbbdev Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev All rights reserved.
21d86ed7fbStbbdev
22d86ed7fbStbbdev Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev modification, are permitted provided that the following conditions
24d86ed7fbStbbdev are met:
25d86ed7fbStbbdev 1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev 2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev 3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev derived from this software without specific prior written permission.
32d86ed7fbStbbdev
33d86ed7fbStbbdev THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev static int nthreads;
71d86ed7fbStbbdev
72d86ed7fbStbbdev static int grain_size = 50;
73d86ed7fbStbbdev
74d86ed7fbStbbdev #ifdef _WIN32
75d86ed7fbStbbdev #include <windows.h>
76d86ed7fbStbbdev #include "pthread_w.hpp"
77d86ed7fbStbbdev #else
78d86ed7fbStbbdev #include <pthread.h>
79d86ed7fbStbbdev #endif
80d86ed7fbStbbdev
81d86ed7fbStbbdev static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
82d86ed7fbStbbdev
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)83d86ed7fbStbbdev static color_t render_one_pixel(int x,
84d86ed7fbStbbdev int y,
85d86ed7fbStbbdev unsigned int *local_mbox,
86d86ed7fbStbbdev unsigned int &serial,
87d86ed7fbStbbdev int startx,
88d86ed7fbStbbdev int stopx,
89d86ed7fbStbbdev int starty,
90d86ed7fbStbbdev int stopy) {
91d86ed7fbStbbdev /* private vars moved inside loop */
92d86ed7fbStbbdev ray primary, sample;
93d86ed7fbStbbdev color col, avcol;
94d86ed7fbStbbdev int R, G, B;
95d86ed7fbStbbdev intersectstruct local_intersections;
96d86ed7fbStbbdev int alias;
97d86ed7fbStbbdev /* end private */
98d86ed7fbStbbdev
99d86ed7fbStbbdev primary = camray(&scene, x, y);
100d86ed7fbStbbdev primary.intstruct = &local_intersections;
101d86ed7fbStbbdev primary.flags = RT_RAY_REGULAR;
102d86ed7fbStbbdev
103d86ed7fbStbbdev serial++;
104d86ed7fbStbbdev primary.serial = serial;
105d86ed7fbStbbdev primary.mbox = local_mbox;
106d86ed7fbStbbdev primary.maxdist = FHUGE;
107d86ed7fbStbbdev primary.scene = &scene;
108d86ed7fbStbbdev col = trace(&primary);
109d86ed7fbStbbdev
110d86ed7fbStbbdev serial = primary.serial;
111d86ed7fbStbbdev
112d86ed7fbStbbdev /* perform antialiasing if enabled.. */
113d86ed7fbStbbdev if (scene.antialiasing > 0) {
114d86ed7fbStbbdev for (alias = 0; alias < scene.antialiasing; alias++) {
115d86ed7fbStbbdev serial++; /* increment serial number */
116d86ed7fbStbbdev sample = primary; /* copy the regular primary ray to start with */
117d86ed7fbStbbdev sample.serial = serial;
118d86ed7fbStbbdev
119d86ed7fbStbbdev {
120d86ed7fbStbbdev pthread_mutex_lock(&MyMutex);
121d86ed7fbStbbdev sample.d.x += ((rand() % 100) - 50) / jitterscale;
122d86ed7fbStbbdev sample.d.y += ((rand() % 100) - 50) / jitterscale;
123d86ed7fbStbbdev sample.d.z += ((rand() % 100) - 50) / jitterscale;
124d86ed7fbStbbdev pthread_mutex_unlock(&MyMutex);
125d86ed7fbStbbdev }
126d86ed7fbStbbdev
127d86ed7fbStbbdev avcol = trace(&sample);
128d86ed7fbStbbdev
129d86ed7fbStbbdev serial = sample.serial; /* update our overall serial # */
130d86ed7fbStbbdev
131d86ed7fbStbbdev col.r += avcol.r;
132d86ed7fbStbbdev col.g += avcol.g;
133d86ed7fbStbbdev col.b += avcol.b;
134d86ed7fbStbbdev }
135d86ed7fbStbbdev
136d86ed7fbStbbdev col.r /= (scene.antialiasing + 1.0);
137d86ed7fbStbbdev col.g /= (scene.antialiasing + 1.0);
138d86ed7fbStbbdev col.b /= (scene.antialiasing + 1.0);
139d86ed7fbStbbdev }
140d86ed7fbStbbdev
141d86ed7fbStbbdev /* Handle overexposure and underexposure here... */
142d86ed7fbStbbdev R = (int)(col.r * 255);
143d86ed7fbStbbdev if (R > 255)
144d86ed7fbStbbdev R = 255;
145d86ed7fbStbbdev else if (R < 0)
146d86ed7fbStbbdev R = 0;
147d86ed7fbStbbdev
148d86ed7fbStbbdev G = (int)(col.g * 255);
149d86ed7fbStbbdev if (G > 255)
150d86ed7fbStbbdev G = 255;
151d86ed7fbStbbdev else if (G < 0)
152d86ed7fbStbbdev G = 0;
153d86ed7fbStbbdev
154d86ed7fbStbbdev B = (int)(col.b * 255);
155d86ed7fbStbbdev if (B > 255)
156d86ed7fbStbbdev B = 255;
157d86ed7fbStbbdev else if (B < 0)
158d86ed7fbStbbdev B = 0;
159d86ed7fbStbbdev
160d86ed7fbStbbdev return video->get_color(R, G, B);
161d86ed7fbStbbdev }
162d86ed7fbStbbdev
163d86ed7fbStbbdev // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
164d86ed7fbStbbdev
165d86ed7fbStbbdev #define MIN(a, b) (((a) < (b)) ? (a) : (b))
166d86ed7fbStbbdev
167d86ed7fbStbbdev static int sched_nexty;
168d86ed7fbStbbdev
schedule_thread_work(int & y1,int & y2)169d86ed7fbStbbdev static bool schedule_thread_work(int &y1, int &y2) {
170d86ed7fbStbbdev pthread_mutex_lock(&MyMutex3);
171d86ed7fbStbbdev #ifdef STATIC_EVEN_SCHEDULING
172d86ed7fbStbbdev // optional static-even scheduling
173d86ed7fbStbbdev y1 = sched_nexty;
174d86ed7fbStbbdev sched_nexty += ((stopy - starty + 1) / nthreads);
175d86ed7fbStbbdev y2 = MIN(sched_nexty, stopy);
176d86ed7fbStbbdev #else
177d86ed7fbStbbdev // dynamic-chunk scheduling with specified grain_size
178d86ed7fbStbbdev y1 = sched_nexty;
179d86ed7fbStbbdev sched_nexty += grain_size;
180d86ed7fbStbbdev y2 = MIN(sched_nexty, stopy);
181d86ed7fbStbbdev #endif
182d86ed7fbStbbdev pthread_mutex_unlock(&MyMutex3);
183d86ed7fbStbbdev return (y1 <= stopy);
184d86ed7fbStbbdev }
185d86ed7fbStbbdev
parallel_thread(void * arg)186d86ed7fbStbbdev static void parallel_thread(void *arg) {
187d86ed7fbStbbdev // thread-local storage
188d86ed7fbStbbdev unsigned int serial = 1;
189d86ed7fbStbbdev unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
190d86ed7fbStbbdev unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
191d86ed7fbStbbdev memset(local_mbox, 0, mboxsize);
192d86ed7fbStbbdev
193d86ed7fbStbbdev // int thread_no = (int) arg;
194d86ed7fbStbbdev int y1, y2;
195d86ed7fbStbbdev while (schedule_thread_work(y1, y2)) {
196d86ed7fbStbbdev for (int y = y1; y < y2; y++) {
197d86ed7fbStbbdev {
198d86ed7fbStbbdev drawing_area drawing(startx, totaly - y, stopx - startx, 1);
199d86ed7fbStbbdev for (int x = startx; x < stopx; x++) {
200d86ed7fbStbbdev color_t c =
201d86ed7fbStbbdev render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
202d86ed7fbStbbdev drawing.put_pixel(c);
203d86ed7fbStbbdev }
204d86ed7fbStbbdev }
205d86ed7fbStbbdev if (!video->next_frame())
206d86ed7fbStbbdev pthread_exit(arg);
207d86ed7fbStbbdev }
208d86ed7fbStbbdev }
209d86ed7fbStbbdev pthread_exit(arg);
210d86ed7fbStbbdev }
211d86ed7fbStbbdev
212d86ed7fbStbbdev // need this (for each platform) so we can create the right number of threads, to work efficiently
213d86ed7fbStbbdev
214d86ed7fbStbbdev #if defined(_WIN32)
215d86ed7fbStbbdev
get_num_cpus(void)216d86ed7fbStbbdev static int get_num_cpus(void) {
217d86ed7fbStbbdev SYSTEM_INFO si;
218d86ed7fbStbbdev GetNativeSystemInfo(&si);
219d86ed7fbStbbdev return (int)si.dwNumberOfProcessors;
220d86ed7fbStbbdev }
221d86ed7fbStbbdev
222d86ed7fbStbbdev #elif defined(__APPLE__)
223d86ed7fbStbbdev
224d86ed7fbStbbdev #include "sys/types.hpp"
225d86ed7fbStbbdev #include "sys/sysctl.hpp"
get_num_cpus(void)226d86ed7fbStbbdev static int get_num_cpus(void) {
227d86ed7fbStbbdev int name[2] = { CTL_HW, HW_NCPU };
228d86ed7fbStbbdev int ncpu;
229d86ed7fbStbbdev std::size_t size = sizeof(ncpu);
230d86ed7fbStbbdev sysctl(name, 2, &ncpu, &size, nullptr, 0);
231d86ed7fbStbbdev return ncpu;
232d86ed7fbStbbdev }
233d86ed7fbStbbdev
234d86ed7fbStbbdev #else /* Linux */
235d86ed7fbStbbdev
236d86ed7fbStbbdev #include <sys/sysinfo.h>
get_num_cpus(void)237d86ed7fbStbbdev static int get_num_cpus(void) {
238d86ed7fbStbbdev return get_nprocs();
239d86ed7fbStbbdev }
240d86ed7fbStbbdev
241d86ed7fbStbbdev #endif
242d86ed7fbStbbdev
thread_trace(thr_parms * parms)243d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
244d86ed7fbStbbdev // shared but read-only so could be private too
245d86ed7fbStbbdev all_parms = parms;
246d86ed7fbStbbdev scene = parms->scene;
247d86ed7fbStbbdev startx = parms->startx;
248d86ed7fbStbbdev stopx = parms->stopx;
249d86ed7fbStbbdev starty = parms->starty;
250d86ed7fbStbbdev stopy = parms->stopy;
251d86ed7fbStbbdev jitterscale = 40.0 * (scene.hres + scene.vres);
252d86ed7fbStbbdev totaly = parms->scene.vres - 1;
253d86ed7fbStbbdev
254d86ed7fbStbbdev int n;
255d86ed7fbStbbdev nthreads = get_num_cpus();
256d86ed7fbStbbdev char *nthreads_str = getenv("THR_NUM_THREADS");
257d86ed7fbStbbdev if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
258d86ed7fbStbbdev nthreads = n;
259d86ed7fbStbbdev char *grain_str = getenv("THR_GRAINSIZE");
260d86ed7fbStbbdev if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
261d86ed7fbStbbdev grain_size = n;
262d86ed7fbStbbdev pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
263d86ed7fbStbbdev pthread_mutex_init(&MyMutex, nullptr);
264d86ed7fbStbbdev pthread_mutex_init(&MyMutex2, nullptr);
265d86ed7fbStbbdev pthread_mutex_init(&MyMutex3, nullptr);
266d86ed7fbStbbdev sched_nexty = starty; // initialize schedule_thread_work() self-scheduler
267d86ed7fbStbbdev for (int i = 0; i < nthreads; i++) {
268d86ed7fbStbbdev pthread_create(
269d86ed7fbStbbdev &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
270d86ed7fbStbbdev }
271d86ed7fbStbbdev for (int i = 0; i < nthreads; i++) {
272d86ed7fbStbbdev void *exit_val;
273d86ed7fbStbbdev pthread_join(threads[i], &exit_val);
274d86ed7fbStbbdev // expect i = (int) exit_val
275d86ed7fbStbbdev }
276d86ed7fbStbbdev
277d86ed7fbStbbdev return (nullptr);
278d86ed7fbStbbdev }
279