1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /*
18     The original source for this example is
19     Copyright (c) 1994-2008 John E. Stone
20     All rights reserved.
21 
22     Redistribution and use in source and binary forms, with or without
23     modification, are permitted provided that the following conditions
24     are met:
25     1. Redistributions of source code must retain the above copyright
26        notice, this list of conditions and the following disclaimer.
27     2. Redistributions in binary form must reproduce the above copyright
28        notice, this list of conditions and the following disclaimer in the
29        documentation and/or other materials provided with the distribution.
30     3. The name of the author may not be used to endorse or promote products
31        derived from this software without specific prior written permission.
32 
33     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43     SUCH DAMAGE.
44 */
45 
46 #include "machine.hpp"
47 #include "types.hpp"
48 #include "macros.hpp"
49 #include "vector.hpp"
50 #include "tgafile.hpp"
51 #include "trace.hpp"
52 #include "light.hpp"
53 #include "shade.hpp"
54 #include "camera.hpp"
55 #include "util.hpp"
56 #include "intersect.hpp"
57 #include "global.hpp"
58 #include "ui.hpp"
59 #include "tachyon_video.hpp"
60 
61 // shared but read-only so could be private too
62 static thr_parms *all_parms;
63 static scenedef scene;
64 static int startx;
65 static int stopx;
66 static int starty;
67 static int stopy;
68 static flt jitterscale;
69 static int totaly;
70 static int nthreads;
71 
72 static int grain_size = 50;
73 
74 #ifdef _WIN32
75 #include <windows.h>
76 #include "pthread_w.hpp"
77 #else
78 #include <pthread.h>
79 #endif
80 
81 static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
82 
83 static color_t render_one_pixel(int x,
84                                 int y,
85                                 unsigned int *local_mbox,
86                                 unsigned int &serial,
87                                 int startx,
88                                 int stopx,
89                                 int starty,
90                                 int stopy) {
91     /* private vars moved inside loop */
92     ray primary, sample;
93     color col, avcol;
94     int R, G, B;
95     intersectstruct local_intersections;
96     int alias;
97     /* end private */
98 
99     primary = camray(&scene, x, y);
100     primary.intstruct = &local_intersections;
101     primary.flags = RT_RAY_REGULAR;
102 
103     serial++;
104     primary.serial = serial;
105     primary.mbox = local_mbox;
106     primary.maxdist = FHUGE;
107     primary.scene = &scene;
108     col = trace(&primary);
109 
110     serial = primary.serial;
111 
112     /* perform antialiasing if enabled.. */
113     if (scene.antialiasing > 0) {
114         for (alias = 0; alias < scene.antialiasing; alias++) {
115             serial++; /* increment serial number */
116             sample = primary; /* copy the regular primary ray to start with */
117             sample.serial = serial;
118 
119             {
120                 pthread_mutex_lock(&MyMutex);
121                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
122                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
123                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
124                 pthread_mutex_unlock(&MyMutex);
125             }
126 
127             avcol = trace(&sample);
128 
129             serial = sample.serial; /* update our overall serial # */
130 
131             col.r += avcol.r;
132             col.g += avcol.g;
133             col.b += avcol.b;
134         }
135 
136         col.r /= (scene.antialiasing + 1.0);
137         col.g /= (scene.antialiasing + 1.0);
138         col.b /= (scene.antialiasing + 1.0);
139     }
140 
141     /* Handle overexposure and underexposure here... */
142     R = (int)(col.r * 255);
143     if (R > 255)
144         R = 255;
145     else if (R < 0)
146         R = 0;
147 
148     G = (int)(col.g * 255);
149     if (G > 255)
150         G = 255;
151     else if (G < 0)
152         G = 0;
153 
154     B = (int)(col.b * 255);
155     if (B > 255)
156         B = 255;
157     else if (B < 0)
158         B = 0;
159 
160     return video->get_color(R, G, B);
161 }
162 
163 // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
164 
165 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
166 
167 static int sched_nexty;
168 
169 static bool schedule_thread_work(int &y1, int &y2) {
170     pthread_mutex_lock(&MyMutex3);
171 #ifdef STATIC_EVEN_SCHEDULING
172     // optional static-even scheduling
173     y1 = sched_nexty;
174     sched_nexty += ((stopy - starty + 1) / nthreads);
175     y2 = MIN(sched_nexty, stopy);
176 #else
177     // dynamic-chunk scheduling with specified grain_size
178     y1 = sched_nexty;
179     sched_nexty += grain_size;
180     y2 = MIN(sched_nexty, stopy);
181 #endif
182     pthread_mutex_unlock(&MyMutex3);
183     return (y1 <= stopy);
184 }
185 
186 static void parallel_thread(void *arg) {
187     // thread-local storage
188     unsigned int serial = 1;
189     unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
190     unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
191     memset(local_mbox, 0, mboxsize);
192 
193     // int thread_no = (int) arg;
194     int y1, y2;
195     while (schedule_thread_work(y1, y2)) {
196         for (int y = y1; y < y2; y++) {
197             {
198                 drawing_area drawing(startx, totaly - y, stopx - startx, 1);
199                 for (int x = startx; x < stopx; x++) {
200                     color_t c =
201                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
202                     drawing.put_pixel(c);
203                 }
204             }
205             if (!video->next_frame())
206                 pthread_exit(arg);
207         }
208     }
209     pthread_exit(arg);
210 }
211 
212 // need this (for each platform) so we can create the right number of threads, to work efficiently
213 
214 #if defined(_WIN32)
215 
216 static int get_num_cpus(void) {
217     SYSTEM_INFO si;
218     GetNativeSystemInfo(&si);
219     return (int)si.dwNumberOfProcessors;
220 }
221 
222 #elif defined(__APPLE__)
223 
224 #include "sys/types.hpp"
225 #include "sys/sysctl.hpp"
226 static int get_num_cpus(void) {
227     int name[2] = { CTL_HW, HW_NCPU };
228     int ncpu;
229     std::size_t size = sizeof(ncpu);
230     sysctl(name, 2, &ncpu, &size, nullptr, 0);
231     return ncpu;
232 }
233 
234 #else /*  Linux  */
235 
236 #include <sys/sysinfo.h>
237 static int get_num_cpus(void) {
238     return get_nprocs();
239 }
240 
241 #endif
242 
243 void *thread_trace(thr_parms *parms) {
244     // shared but read-only so could be private too
245     all_parms = parms;
246     scene = parms->scene;
247     startx = parms->startx;
248     stopx = parms->stopx;
249     starty = parms->starty;
250     stopy = parms->stopy;
251     jitterscale = 40.0 * (scene.hres + scene.vres);
252     totaly = parms->scene.vres - 1;
253 
254     int n;
255     nthreads = get_num_cpus();
256     char *nthreads_str = getenv("THR_NUM_THREADS");
257     if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
258         nthreads = n;
259     char *grain_str = getenv("THR_GRAINSIZE");
260     if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
261         grain_size = n;
262     pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
263     pthread_mutex_init(&MyMutex, nullptr);
264     pthread_mutex_init(&MyMutex2, nullptr);
265     pthread_mutex_init(&MyMutex3, nullptr);
266     sched_nexty = starty; // initialize schedule_thread_work() self-scheduler
267     for (int i = 0; i < nthreads; i++) {
268         pthread_create(
269             &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
270     }
271     for (int i = 0; i < nthreads; i++) {
272         void *exit_val;
273         pthread_join(threads[i], &exit_val);
274         // expect i = (int) exit_val
275     }
276 
277     return (nullptr);
278 }
279