1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*
18 The original source for this example is
19 Copyright (c) 1994-2008 John E. Stone
20 All rights reserved.
21
22 Redistribution and use in source and binary forms, with or without
23 modification, are permitted provided that the following conditions
24 are met:
25 1. Redistributions of source code must retain the above copyright
26 notice, this list of conditions and the following disclaimer.
27 2. Redistributions in binary form must reproduce the above copyright
28 notice, this list of conditions and the following disclaimer in the
29 documentation and/or other materials provided with the distribution.
30 3. The name of the author may not be used to endorse or promote products
31 derived from this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 SUCH DAMAGE.
44 */
45
46 #include "machine.hpp"
47 #include "types.hpp"
48 #include "macros.hpp"
49 #include "vector.hpp"
50 #include "tgafile.hpp"
51 #include "trace.hpp"
52 #include "light.hpp"
53 #include "shade.hpp"
54 #include "camera.hpp"
55 #include "util.hpp"
56 #include "intersect.hpp"
57 #include "global.hpp"
58 #include "ui.hpp"
59 #include "tachyon_video.hpp"
60
61 // shared but read-only so could be private too
62 static thr_parms *all_parms;
63 static scenedef scene;
64 static int startx;
65 static int stopx;
66 static int starty;
67 static int stopy;
68 static flt jitterscale;
69 static int totaly;
70 static int nthreads;
71
72 static int grain_size = 50;
73
74 #ifdef _WIN32
75 #include <windows.h>
76 #include "pthread_w.hpp"
77 #else
78 #include <pthread.h>
79 #endif
80
81 static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
82
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)83 static color_t render_one_pixel(int x,
84 int y,
85 unsigned int *local_mbox,
86 unsigned int &serial,
87 int startx,
88 int stopx,
89 int starty,
90 int stopy) {
91 /* private vars moved inside loop */
92 ray primary, sample;
93 color col, avcol;
94 int R, G, B;
95 intersectstruct local_intersections;
96 int alias;
97 /* end private */
98
99 primary = camray(&scene, x, y);
100 primary.intstruct = &local_intersections;
101 primary.flags = RT_RAY_REGULAR;
102
103 serial++;
104 primary.serial = serial;
105 primary.mbox = local_mbox;
106 primary.maxdist = FHUGE;
107 primary.scene = &scene;
108 col = trace(&primary);
109
110 serial = primary.serial;
111
112 /* perform antialiasing if enabled.. */
113 if (scene.antialiasing > 0) {
114 for (alias = 0; alias < scene.antialiasing; alias++) {
115 serial++; /* increment serial number */
116 sample = primary; /* copy the regular primary ray to start with */
117 sample.serial = serial;
118
119 {
120 pthread_mutex_lock(&MyMutex);
121 sample.d.x += ((rand() % 100) - 50) / jitterscale;
122 sample.d.y += ((rand() % 100) - 50) / jitterscale;
123 sample.d.z += ((rand() % 100) - 50) / jitterscale;
124 pthread_mutex_unlock(&MyMutex);
125 }
126
127 avcol = trace(&sample);
128
129 serial = sample.serial; /* update our overall serial # */
130
131 col.r += avcol.r;
132 col.g += avcol.g;
133 col.b += avcol.b;
134 }
135
136 col.r /= (scene.antialiasing + 1.0);
137 col.g /= (scene.antialiasing + 1.0);
138 col.b /= (scene.antialiasing + 1.0);
139 }
140
141 /* Handle overexposure and underexposure here... */
142 R = (int)(col.r * 255);
143 if (R > 255)
144 R = 255;
145 else if (R < 0)
146 R = 0;
147
148 G = (int)(col.g * 255);
149 if (G > 255)
150 G = 255;
151 else if (G < 0)
152 G = 0;
153
154 B = (int)(col.b * 255);
155 if (B > 255)
156 B = 255;
157 else if (B < 0)
158 B = 0;
159
160 return video->get_color(R, G, B);
161 }
162
163 // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
164
165 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
166
167 static int sched_nexty;
168
schedule_thread_work(int & y1,int & y2)169 static bool schedule_thread_work(int &y1, int &y2) {
170 pthread_mutex_lock(&MyMutex3);
171 #ifdef STATIC_EVEN_SCHEDULING
172 // optional static-even scheduling
173 y1 = sched_nexty;
174 sched_nexty += ((stopy - starty + 1) / nthreads);
175 y2 = MIN(sched_nexty, stopy);
176 #else
177 // dynamic-chunk scheduling with specified grain_size
178 y1 = sched_nexty;
179 sched_nexty += grain_size;
180 y2 = MIN(sched_nexty, stopy);
181 #endif
182 pthread_mutex_unlock(&MyMutex3);
183 return (y1 <= stopy);
184 }
185
parallel_thread(void * arg)186 static void parallel_thread(void *arg) {
187 // thread-local storage
188 unsigned int serial = 1;
189 unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
190 unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
191 memset(local_mbox, 0, mboxsize);
192
193 // int thread_no = (int) arg;
194 int y1, y2;
195 while (schedule_thread_work(y1, y2)) {
196 for (int y = y1; y < y2; y++) {
197 {
198 drawing_area drawing(startx, totaly - y, stopx - startx, 1);
199 for (int x = startx; x < stopx; x++) {
200 color_t c =
201 render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
202 drawing.put_pixel(c);
203 }
204 }
205 if (!video->next_frame())
206 pthread_exit(arg);
207 }
208 }
209 pthread_exit(arg);
210 }
211
212 // need this (for each platform) so we can create the right number of threads, to work efficiently
213
214 #if defined(_WIN32)
215
get_num_cpus(void)216 static int get_num_cpus(void) {
217 SYSTEM_INFO si;
218 GetNativeSystemInfo(&si);
219 return (int)si.dwNumberOfProcessors;
220 }
221
222 #elif defined(__APPLE__)
223
224 #include "sys/types.hpp"
225 #include "sys/sysctl.hpp"
get_num_cpus(void)226 static int get_num_cpus(void) {
227 int name[2] = { CTL_HW, HW_NCPU };
228 int ncpu;
229 std::size_t size = sizeof(ncpu);
230 sysctl(name, 2, &ncpu, &size, nullptr, 0);
231 return ncpu;
232 }
233
234 #else /* Linux */
235
236 #include <sys/sysinfo.h>
get_num_cpus(void)237 static int get_num_cpus(void) {
238 return get_nprocs();
239 }
240
241 #endif
242
thread_trace(thr_parms * parms)243 void *thread_trace(thr_parms *parms) {
244 // shared but read-only so could be private too
245 all_parms = parms;
246 scene = parms->scene;
247 startx = parms->startx;
248 stopx = parms->stopx;
249 starty = parms->starty;
250 stopy = parms->stopy;
251 jitterscale = 40.0 * (scene.hres + scene.vres);
252 totaly = parms->scene.vres - 1;
253
254 int n;
255 nthreads = get_num_cpus();
256 char *nthreads_str = getenv("THR_NUM_THREADS");
257 if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
258 nthreads = n;
259 char *grain_str = getenv("THR_GRAINSIZE");
260 if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
261 grain_size = n;
262 pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
263 pthread_mutex_init(&MyMutex, nullptr);
264 pthread_mutex_init(&MyMutex2, nullptr);
265 pthread_mutex_init(&MyMutex3, nullptr);
266 sched_nexty = starty; // initialize schedule_thread_work() self-scheduler
267 for (int i = 0; i < nthreads; i++) {
268 pthread_create(
269 &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
270 }
271 for (int i = 0; i < nthreads; i++) {
272 void *exit_val;
273 pthread_join(threads[i], &exit_val);
274 // expect i = (int) exit_val
275 }
276
277 return (nullptr);
278 }
279