1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev     The original source for this example is
19d86ed7fbStbbdev     Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev     All rights reserved.
21d86ed7fbStbbdev 
22d86ed7fbStbbdev     Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev     modification, are permitted provided that the following conditions
24d86ed7fbStbbdev     are met:
25d86ed7fbStbbdev     1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev     2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev        documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev     3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev        derived from this software without specific prior written permission.
32d86ed7fbStbbdev 
33d86ed7fbStbbdev     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev     SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev 
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev 
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev static int nthreads;
71d86ed7fbStbbdev 
72d86ed7fbStbbdev static int grain_size = 50;
73d86ed7fbStbbdev 
74d86ed7fbStbbdev #ifdef _WIN32
75d86ed7fbStbbdev #include <windows.h>
76d86ed7fbStbbdev #include "pthread_w.hpp"
77d86ed7fbStbbdev #else
78d86ed7fbStbbdev #include <pthread.h>
79d86ed7fbStbbdev #endif
80d86ed7fbStbbdev 
81d86ed7fbStbbdev static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
82d86ed7fbStbbdev 
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)83d86ed7fbStbbdev static color_t render_one_pixel(int x,
84d86ed7fbStbbdev                                 int y,
85d86ed7fbStbbdev                                 unsigned int *local_mbox,
86d86ed7fbStbbdev                                 unsigned int &serial,
87d86ed7fbStbbdev                                 int startx,
88d86ed7fbStbbdev                                 int stopx,
89d86ed7fbStbbdev                                 int starty,
90d86ed7fbStbbdev                                 int stopy) {
91d86ed7fbStbbdev     /* private vars moved inside loop */
92d86ed7fbStbbdev     ray primary, sample;
93d86ed7fbStbbdev     color col, avcol;
94d86ed7fbStbbdev     int R, G, B;
95d86ed7fbStbbdev     intersectstruct local_intersections;
96d86ed7fbStbbdev     int alias;
97d86ed7fbStbbdev     /* end private */
98d86ed7fbStbbdev 
99d86ed7fbStbbdev     primary = camray(&scene, x, y);
100d86ed7fbStbbdev     primary.intstruct = &local_intersections;
101d86ed7fbStbbdev     primary.flags = RT_RAY_REGULAR;
102d86ed7fbStbbdev 
103d86ed7fbStbbdev     serial++;
104d86ed7fbStbbdev     primary.serial = serial;
105d86ed7fbStbbdev     primary.mbox = local_mbox;
106d86ed7fbStbbdev     primary.maxdist = FHUGE;
107d86ed7fbStbbdev     primary.scene = &scene;
108d86ed7fbStbbdev     col = trace(&primary);
109d86ed7fbStbbdev 
110d86ed7fbStbbdev     serial = primary.serial;
111d86ed7fbStbbdev 
112d86ed7fbStbbdev     /* perform antialiasing if enabled.. */
113d86ed7fbStbbdev     if (scene.antialiasing > 0) {
114d86ed7fbStbbdev         for (alias = 0; alias < scene.antialiasing; alias++) {
115d86ed7fbStbbdev             serial++; /* increment serial number */
116d86ed7fbStbbdev             sample = primary; /* copy the regular primary ray to start with */
117d86ed7fbStbbdev             sample.serial = serial;
118d86ed7fbStbbdev 
119d86ed7fbStbbdev             {
120d86ed7fbStbbdev                 pthread_mutex_lock(&MyMutex);
121d86ed7fbStbbdev                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
122d86ed7fbStbbdev                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
123d86ed7fbStbbdev                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
124d86ed7fbStbbdev                 pthread_mutex_unlock(&MyMutex);
125d86ed7fbStbbdev             }
126d86ed7fbStbbdev 
127d86ed7fbStbbdev             avcol = trace(&sample);
128d86ed7fbStbbdev 
129d86ed7fbStbbdev             serial = sample.serial; /* update our overall serial # */
130d86ed7fbStbbdev 
131d86ed7fbStbbdev             col.r += avcol.r;
132d86ed7fbStbbdev             col.g += avcol.g;
133d86ed7fbStbbdev             col.b += avcol.b;
134d86ed7fbStbbdev         }
135d86ed7fbStbbdev 
136d86ed7fbStbbdev         col.r /= (scene.antialiasing + 1.0);
137d86ed7fbStbbdev         col.g /= (scene.antialiasing + 1.0);
138d86ed7fbStbbdev         col.b /= (scene.antialiasing + 1.0);
139d86ed7fbStbbdev     }
140d86ed7fbStbbdev 
141d86ed7fbStbbdev     /* Handle overexposure and underexposure here... */
142d86ed7fbStbbdev     R = (int)(col.r * 255);
143d86ed7fbStbbdev     if (R > 255)
144d86ed7fbStbbdev         R = 255;
145d86ed7fbStbbdev     else if (R < 0)
146d86ed7fbStbbdev         R = 0;
147d86ed7fbStbbdev 
148d86ed7fbStbbdev     G = (int)(col.g * 255);
149d86ed7fbStbbdev     if (G > 255)
150d86ed7fbStbbdev         G = 255;
151d86ed7fbStbbdev     else if (G < 0)
152d86ed7fbStbbdev         G = 0;
153d86ed7fbStbbdev 
154d86ed7fbStbbdev     B = (int)(col.b * 255);
155d86ed7fbStbbdev     if (B > 255)
156d86ed7fbStbbdev         B = 255;
157d86ed7fbStbbdev     else if (B < 0)
158d86ed7fbStbbdev         B = 0;
159d86ed7fbStbbdev 
160d86ed7fbStbbdev     return video->get_color(R, G, B);
161d86ed7fbStbbdev }
162d86ed7fbStbbdev 
163d86ed7fbStbbdev // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
164d86ed7fbStbbdev 
165d86ed7fbStbbdev #define MIN(a, b) (((a) < (b)) ? (a) : (b))
166d86ed7fbStbbdev 
167d86ed7fbStbbdev static int sched_nexty;
168d86ed7fbStbbdev 
schedule_thread_work(int & y1,int & y2)169d86ed7fbStbbdev static bool schedule_thread_work(int &y1, int &y2) {
170d86ed7fbStbbdev     pthread_mutex_lock(&MyMutex3);
171d86ed7fbStbbdev #ifdef STATIC_EVEN_SCHEDULING
172d86ed7fbStbbdev     // optional static-even scheduling
173d86ed7fbStbbdev     y1 = sched_nexty;
174d86ed7fbStbbdev     sched_nexty += ((stopy - starty + 1) / nthreads);
175d86ed7fbStbbdev     y2 = MIN(sched_nexty, stopy);
176d86ed7fbStbbdev #else
177d86ed7fbStbbdev     // dynamic-chunk scheduling with specified grain_size
178d86ed7fbStbbdev     y1 = sched_nexty;
179d86ed7fbStbbdev     sched_nexty += grain_size;
180d86ed7fbStbbdev     y2 = MIN(sched_nexty, stopy);
181d86ed7fbStbbdev #endif
182d86ed7fbStbbdev     pthread_mutex_unlock(&MyMutex3);
183d86ed7fbStbbdev     return (y1 <= stopy);
184d86ed7fbStbbdev }
185d86ed7fbStbbdev 
parallel_thread(void * arg)186d86ed7fbStbbdev static void parallel_thread(void *arg) {
187d86ed7fbStbbdev     // thread-local storage
188d86ed7fbStbbdev     unsigned int serial = 1;
189d86ed7fbStbbdev     unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
190d86ed7fbStbbdev     unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
191d86ed7fbStbbdev     memset(local_mbox, 0, mboxsize);
192d86ed7fbStbbdev 
193d86ed7fbStbbdev     // int thread_no = (int) arg;
194d86ed7fbStbbdev     int y1, y2;
195d86ed7fbStbbdev     while (schedule_thread_work(y1, y2)) {
196d86ed7fbStbbdev         for (int y = y1; y < y2; y++) {
197d86ed7fbStbbdev             {
198d86ed7fbStbbdev                 drawing_area drawing(startx, totaly - y, stopx - startx, 1);
199d86ed7fbStbbdev                 for (int x = startx; x < stopx; x++) {
200d86ed7fbStbbdev                     color_t c =
201d86ed7fbStbbdev                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
202d86ed7fbStbbdev                     drawing.put_pixel(c);
203d86ed7fbStbbdev                 }
204d86ed7fbStbbdev             }
205d86ed7fbStbbdev             if (!video->next_frame())
206d86ed7fbStbbdev                 pthread_exit(arg);
207d86ed7fbStbbdev         }
208d86ed7fbStbbdev     }
209d86ed7fbStbbdev     pthread_exit(arg);
210d86ed7fbStbbdev }
211d86ed7fbStbbdev 
212d86ed7fbStbbdev // need this (for each platform) so we can create the right number of threads, to work efficiently
213d86ed7fbStbbdev 
214d86ed7fbStbbdev #if defined(_WIN32)
215d86ed7fbStbbdev 
get_num_cpus(void)216d86ed7fbStbbdev static int get_num_cpus(void) {
217d86ed7fbStbbdev     SYSTEM_INFO si;
218d86ed7fbStbbdev     GetNativeSystemInfo(&si);
219d86ed7fbStbbdev     return (int)si.dwNumberOfProcessors;
220d86ed7fbStbbdev }
221d86ed7fbStbbdev 
222d86ed7fbStbbdev #elif defined(__APPLE__)
223d86ed7fbStbbdev 
224d86ed7fbStbbdev #include "sys/types.hpp"
225d86ed7fbStbbdev #include "sys/sysctl.hpp"
get_num_cpus(void)226d86ed7fbStbbdev static int get_num_cpus(void) {
227d86ed7fbStbbdev     int name[2] = { CTL_HW, HW_NCPU };
228d86ed7fbStbbdev     int ncpu;
229d86ed7fbStbbdev     std::size_t size = sizeof(ncpu);
230d86ed7fbStbbdev     sysctl(name, 2, &ncpu, &size, nullptr, 0);
231d86ed7fbStbbdev     return ncpu;
232d86ed7fbStbbdev }
233d86ed7fbStbbdev 
234d86ed7fbStbbdev #else /*  Linux  */
235d86ed7fbStbbdev 
236d86ed7fbStbbdev #include <sys/sysinfo.h>
get_num_cpus(void)237d86ed7fbStbbdev static int get_num_cpus(void) {
238d86ed7fbStbbdev     return get_nprocs();
239d86ed7fbStbbdev }
240d86ed7fbStbbdev 
241d86ed7fbStbbdev #endif
242d86ed7fbStbbdev 
thread_trace(thr_parms * parms)243d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
244d86ed7fbStbbdev     // shared but read-only so could be private too
245d86ed7fbStbbdev     all_parms = parms;
246d86ed7fbStbbdev     scene = parms->scene;
247d86ed7fbStbbdev     startx = parms->startx;
248d86ed7fbStbbdev     stopx = parms->stopx;
249d86ed7fbStbbdev     starty = parms->starty;
250d86ed7fbStbbdev     stopy = parms->stopy;
251d86ed7fbStbbdev     jitterscale = 40.0 * (scene.hres + scene.vres);
252d86ed7fbStbbdev     totaly = parms->scene.vres - 1;
253d86ed7fbStbbdev 
254d86ed7fbStbbdev     int n;
255d86ed7fbStbbdev     nthreads = get_num_cpus();
256d86ed7fbStbbdev     char *nthreads_str = getenv("THR_NUM_THREADS");
257d86ed7fbStbbdev     if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
258d86ed7fbStbbdev         nthreads = n;
259d86ed7fbStbbdev     char *grain_str = getenv("THR_GRAINSIZE");
260d86ed7fbStbbdev     if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
261d86ed7fbStbbdev         grain_size = n;
262d86ed7fbStbbdev     pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
263d86ed7fbStbbdev     pthread_mutex_init(&MyMutex, nullptr);
264d86ed7fbStbbdev     pthread_mutex_init(&MyMutex2, nullptr);
265d86ed7fbStbbdev     pthread_mutex_init(&MyMutex3, nullptr);
266d86ed7fbStbbdev     sched_nexty = starty; // initialize schedule_thread_work() self-scheduler
267d86ed7fbStbbdev     for (int i = 0; i < nthreads; i++) {
268d86ed7fbStbbdev         pthread_create(
269d86ed7fbStbbdev             &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
270d86ed7fbStbbdev     }
271d86ed7fbStbbdev     for (int i = 0; i < nthreads; i++) {
272d86ed7fbStbbdev         void *exit_val;
273d86ed7fbStbbdev         pthread_join(threads[i], &exit_val);
274d86ed7fbStbbdev         // expect i = (int) exit_val
275d86ed7fbStbbdev     }
276d86ed7fbStbbdev 
277d86ed7fbStbbdev     return (nullptr);
278d86ed7fbStbbdev }
279