1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev     The original source for this example is
19d86ed7fbStbbdev     Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev     All rights reserved.
21d86ed7fbStbbdev 
22d86ed7fbStbbdev     Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev     modification, are permitted provided that the following conditions
24d86ed7fbStbbdev     are met:
25d86ed7fbStbbdev     1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev     2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev        documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev     3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev        derived from this software without specific prior written permission.
32d86ed7fbStbbdev 
33d86ed7fbStbbdev     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev     SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev 
46d86ed7fbStbbdev #include "machine.hpp"
47d86ed7fbStbbdev #include "types.hpp"
48d86ed7fbStbbdev #include "macros.hpp"
49d86ed7fbStbbdev #include "vector.hpp"
50d86ed7fbStbbdev #include "tgafile.hpp"
51d86ed7fbStbbdev #include "trace.hpp"
52d86ed7fbStbbdev #include "light.hpp"
53d86ed7fbStbbdev #include "shade.hpp"
54d86ed7fbStbbdev #include "camera.hpp"
55d86ed7fbStbbdev #include "util.hpp"
56d86ed7fbStbbdev #include "intersect.hpp"
57d86ed7fbStbbdev #include "global.hpp"
58d86ed7fbStbbdev #include "ui.hpp"
59d86ed7fbStbbdev #include "tachyon_video.hpp"
60d86ed7fbStbbdev 
61d86ed7fbStbbdev // shared but read-only so could be private too
62d86ed7fbStbbdev static thr_parms *all_parms;
63d86ed7fbStbbdev static scenedef scene;
64d86ed7fbStbbdev static int startx;
65d86ed7fbStbbdev static int stopx;
66d86ed7fbStbbdev static int starty;
67d86ed7fbStbbdev static int stopy;
68d86ed7fbStbbdev static flt jitterscale;
69d86ed7fbStbbdev static int totaly;
70d86ed7fbStbbdev static int nthreads;
71d86ed7fbStbbdev 
72d86ed7fbStbbdev static int grain_size = 50;
73d86ed7fbStbbdev const int DIVFACTOR = 2;
74d86ed7fbStbbdev 
75d86ed7fbStbbdev #define MIN(a, b) ((a) < (b) ? (a) : (b))
76d86ed7fbStbbdev 
77d86ed7fbStbbdev #ifdef _WIN32
78d86ed7fbStbbdev #include <windows.h>
79d86ed7fbStbbdev #include "pthread_w.hpp"
80d86ed7fbStbbdev #else
81d86ed7fbStbbdev #include <pthread.h>
82d86ed7fbStbbdev #endif
83d86ed7fbStbbdev 
84d86ed7fbStbbdev static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
85d86ed7fbStbbdev 
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)86d86ed7fbStbbdev static color_t render_one_pixel(int x,
87d86ed7fbStbbdev                                 int y,
88d86ed7fbStbbdev                                 unsigned int *local_mbox,
89d86ed7fbStbbdev                                 unsigned int &serial,
90d86ed7fbStbbdev                                 int startx,
91d86ed7fbStbbdev                                 int stopx,
92d86ed7fbStbbdev                                 int starty,
93d86ed7fbStbbdev                                 int stopy) {
94d86ed7fbStbbdev     /* private vars moved inside loop */
95d86ed7fbStbbdev     ray primary, sample;
96d86ed7fbStbbdev     color col, avcol;
97d86ed7fbStbbdev     int R, G, B;
98d86ed7fbStbbdev     intersectstruct local_intersections;
99d86ed7fbStbbdev     int alias;
100d86ed7fbStbbdev     /* end private */
101d86ed7fbStbbdev 
102d86ed7fbStbbdev     primary = camray(&scene, x, y);
103d86ed7fbStbbdev     primary.intstruct = &local_intersections;
104d86ed7fbStbbdev     primary.flags = RT_RAY_REGULAR;
105d86ed7fbStbbdev 
106d86ed7fbStbbdev     serial++;
107d86ed7fbStbbdev     primary.serial = serial;
108d86ed7fbStbbdev     primary.mbox = local_mbox;
109d86ed7fbStbbdev     primary.maxdist = FHUGE;
110d86ed7fbStbbdev     primary.scene = &scene;
111d86ed7fbStbbdev     col = trace(&primary);
112d86ed7fbStbbdev 
113d86ed7fbStbbdev     serial = primary.serial;
114d86ed7fbStbbdev 
115d86ed7fbStbbdev     /* perform antialiasing if enabled.. */
116d86ed7fbStbbdev     if (scene.antialiasing > 0) {
117d86ed7fbStbbdev         for (alias = 0; alias < scene.antialiasing; alias++) {
118d86ed7fbStbbdev             serial++; /* increment serial number */
119d86ed7fbStbbdev             sample = primary; /* copy the regular primary ray to start with */
120d86ed7fbStbbdev             sample.serial = serial;
121d86ed7fbStbbdev 
122d86ed7fbStbbdev             {
123d86ed7fbStbbdev                 pthread_mutex_lock(&MyMutex);
124d86ed7fbStbbdev                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
125d86ed7fbStbbdev                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
126d86ed7fbStbbdev                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
127d86ed7fbStbbdev                 pthread_mutex_unlock(&MyMutex);
128d86ed7fbStbbdev             }
129d86ed7fbStbbdev 
130d86ed7fbStbbdev             avcol = trace(&sample);
131d86ed7fbStbbdev 
132d86ed7fbStbbdev             serial = sample.serial; /* update our overall serial # */
133d86ed7fbStbbdev 
134d86ed7fbStbbdev             col.r += avcol.r;
135d86ed7fbStbbdev             col.g += avcol.g;
136d86ed7fbStbbdev             col.b += avcol.b;
137d86ed7fbStbbdev         }
138d86ed7fbStbbdev 
139d86ed7fbStbbdev         col.r /= (scene.antialiasing + 1.0);
140d86ed7fbStbbdev         col.g /= (scene.antialiasing + 1.0);
141d86ed7fbStbbdev         col.b /= (scene.antialiasing + 1.0);
142d86ed7fbStbbdev     }
143d86ed7fbStbbdev 
144d86ed7fbStbbdev     /* Handle overexposure and underexposure here... */
145d86ed7fbStbbdev     R = (int)(col.r * 255);
146d86ed7fbStbbdev     if (R > 255)
147d86ed7fbStbbdev         R = 255;
148d86ed7fbStbbdev     else if (R < 0)
149d86ed7fbStbbdev         R = 0;
150d86ed7fbStbbdev 
151d86ed7fbStbbdev     G = (int)(col.g * 255);
152d86ed7fbStbbdev     if (G > 255)
153d86ed7fbStbbdev         G = 255;
154d86ed7fbStbbdev     else if (G < 0)
155d86ed7fbStbbdev         G = 0;
156d86ed7fbStbbdev 
157d86ed7fbStbbdev     B = (int)(col.b * 255);
158d86ed7fbStbbdev     if (B > 255)
159d86ed7fbStbbdev         B = 255;
160d86ed7fbStbbdev     else if (B < 0)
161d86ed7fbStbbdev         B = 0;
162d86ed7fbStbbdev 
163d86ed7fbStbbdev     return video->get_color(R, G, B);
164d86ed7fbStbbdev }
165d86ed7fbStbbdev 
166d86ed7fbStbbdev // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
167d86ed7fbStbbdev 
168d86ed7fbStbbdev typedef struct work_queue_entry_s {
169d86ed7fbStbbdev     patch pch;
170d86ed7fbStbbdev     struct work_queue_entry_s *next;
171d86ed7fbStbbdev } work_queue_entry_t;
172d86ed7fbStbbdev static work_queue_entry_t *work_queue_head = nullptr;
173d86ed7fbStbbdev static work_queue_entry_t *work_queue_tail = nullptr;
174d86ed7fbStbbdev 
generate_work(patch * pchin)175d86ed7fbStbbdev static void generate_work(patch *pchin) {
176d86ed7fbStbbdev     int startx, stopx, starty, stopy;
177d86ed7fbStbbdev     int xs, ys;
178d86ed7fbStbbdev 
179d86ed7fbStbbdev     startx = pchin->startx;
180d86ed7fbStbbdev     stopx = pchin->stopx;
181d86ed7fbStbbdev     starty = pchin->starty;
182d86ed7fbStbbdev     stopy = pchin->stopy;
183d86ed7fbStbbdev 
184d86ed7fbStbbdev     if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) {
185d86ed7fbStbbdev         int xpatchsize = (stopx - startx) / DIVFACTOR + 1;
186d86ed7fbStbbdev         int ypatchsize = (stopy - starty) / DIVFACTOR + 1;
187d86ed7fbStbbdev         for (ys = starty; ys <= stopy; ys += ypatchsize)
188d86ed7fbStbbdev             for (xs = startx; xs <= stopx; xs += xpatchsize) {
189d86ed7fbStbbdev                 patch pch;
190d86ed7fbStbbdev                 pch.startx = xs;
191d86ed7fbStbbdev                 pch.starty = ys;
192d86ed7fbStbbdev                 pch.stopx = MIN(xs + xpatchsize, stopx);
193d86ed7fbStbbdev                 pch.stopy = MIN(ys + ypatchsize, stopy);
194d86ed7fbStbbdev 
195d86ed7fbStbbdev                 generate_work(&pch);
196d86ed7fbStbbdev             }
197d86ed7fbStbbdev     }
198d86ed7fbStbbdev     else {
199d86ed7fbStbbdev         /* just trace this patch */
200d86ed7fbStbbdev         work_queue_entry_t *q = (work_queue_entry_t *)malloc(sizeof(work_queue_entry_t));
201d86ed7fbStbbdev         q->pch.starty = starty;
202d86ed7fbStbbdev         q->pch.stopy = stopy;
203d86ed7fbStbbdev         q->pch.startx = startx;
204d86ed7fbStbbdev         q->pch.stopx = stopx;
205d86ed7fbStbbdev         q->next = nullptr;
206d86ed7fbStbbdev         if (work_queue_head == nullptr) {
207d86ed7fbStbbdev             work_queue_head = q;
208d86ed7fbStbbdev         }
209d86ed7fbStbbdev         else {
210d86ed7fbStbbdev             work_queue_tail->next = q;
211d86ed7fbStbbdev         }
212d86ed7fbStbbdev         work_queue_tail = q;
213d86ed7fbStbbdev     }
214d86ed7fbStbbdev }
215d86ed7fbStbbdev 
generate_worklist(void)216d86ed7fbStbbdev static void generate_worklist(void) {
217d86ed7fbStbbdev     patch pch;
218d86ed7fbStbbdev     pch.startx = startx;
219d86ed7fbStbbdev     pch.stopx = stopx;
220d86ed7fbStbbdev     pch.starty = starty;
221d86ed7fbStbbdev     pch.stopy = stopy;
222d86ed7fbStbbdev     generate_work(&pch);
223d86ed7fbStbbdev }
224d86ed7fbStbbdev 
schedule_thread_work(patch & pch)225d86ed7fbStbbdev static bool schedule_thread_work(patch &pch) {
226d86ed7fbStbbdev     pthread_mutex_lock(&MyMutex3);
227d86ed7fbStbbdev     work_queue_entry_t *q = work_queue_head;
228d86ed7fbStbbdev     if (q != nullptr) {
229d86ed7fbStbbdev         pch = q->pch;
230d86ed7fbStbbdev         work_queue_head = work_queue_head->next;
231d86ed7fbStbbdev     }
232d86ed7fbStbbdev     pthread_mutex_unlock(&MyMutex3);
233d86ed7fbStbbdev     return (q != nullptr);
234d86ed7fbStbbdev }
235d86ed7fbStbbdev 
parallel_thread(void * arg)236d86ed7fbStbbdev static void parallel_thread(void *arg) {
237d86ed7fbStbbdev     // thread-local storage
238d86ed7fbStbbdev     unsigned int serial = 1;
239d86ed7fbStbbdev     unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
240d86ed7fbStbbdev     unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
241d86ed7fbStbbdev     memset(local_mbox, 0, mboxsize);
242d86ed7fbStbbdev 
243d86ed7fbStbbdev     // int thread_no = (int) arg;
244d86ed7fbStbbdev     patch pch;
245d86ed7fbStbbdev     while (schedule_thread_work(pch)) {
246d86ed7fbStbbdev         {
247d86ed7fbStbbdev             drawing_area drawing(
248d86ed7fbStbbdev                 pch.startx, totaly - pch.stopy, pch.stopx - pch.startx, pch.stopy - pch.starty);
249d86ed7fbStbbdev             for (int i = 1, y = pch.starty; y < pch.stopy; ++y, i++) {
250d86ed7fbStbbdev                 drawing.set_pos(0, drawing.size_y - i);
251d86ed7fbStbbdev                 for (int x = pch.startx; x < pch.stopx; x++) {
252d86ed7fbStbbdev                     color_t c =
253d86ed7fbStbbdev                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
254d86ed7fbStbbdev                     drawing.put_pixel(c);
255d86ed7fbStbbdev                 }
256d86ed7fbStbbdev             }
257d86ed7fbStbbdev         }
258d86ed7fbStbbdev         if (!video->next_frame())
259d86ed7fbStbbdev             pthread_exit(arg);
260d86ed7fbStbbdev     }
261d86ed7fbStbbdev     pthread_exit(arg);
262d86ed7fbStbbdev }
263d86ed7fbStbbdev 
264d86ed7fbStbbdev // need this (for each platform) so we can create the right number of threads, to work efficiently
265d86ed7fbStbbdev 
266d86ed7fbStbbdev #if defined(_WIN32)
267d86ed7fbStbbdev 
get_num_cpus(void)268d86ed7fbStbbdev static int get_num_cpus(void) {
269d86ed7fbStbbdev     SYSTEM_INFO si;
270d86ed7fbStbbdev     GetNativeSystemInfo(&si);
271d86ed7fbStbbdev     return (int)si.dwNumberOfProcessors;
272d86ed7fbStbbdev }
273d86ed7fbStbbdev 
274d86ed7fbStbbdev #elif defined(__APPLE__)
275d86ed7fbStbbdev 
276d86ed7fbStbbdev #include "sys/types.hpp"
277d86ed7fbStbbdev #include "sys/sysctl.hpp"
get_num_cpus(void)278d86ed7fbStbbdev static int get_num_cpus(void) {
279d86ed7fbStbbdev     int name[2] = { CTL_HW, HW_NCPU };
280d86ed7fbStbbdev     int ncpu;
281d86ed7fbStbbdev     std::size_t size = sizeof(ncpu);
282d86ed7fbStbbdev     sysctl(name, 2, &ncpu, &size, nullptr, 0);
283d86ed7fbStbbdev     return ncpu;
284d86ed7fbStbbdev }
285d86ed7fbStbbdev 
286d86ed7fbStbbdev #else /*  Linux  */
287d86ed7fbStbbdev 
288d86ed7fbStbbdev #include <sys/sysinfo.h>
get_num_cpus(void)289d86ed7fbStbbdev static int get_num_cpus(void) {
290d86ed7fbStbbdev     return get_nprocs();
291d86ed7fbStbbdev }
292d86ed7fbStbbdev 
293d86ed7fbStbbdev #endif
294d86ed7fbStbbdev 
thread_trace(thr_parms * parms)295d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
296d86ed7fbStbbdev     // shared but read-only so could be private too
297d86ed7fbStbbdev     all_parms = parms;
298d86ed7fbStbbdev     scene = parms->scene;
299d86ed7fbStbbdev     startx = parms->startx;
300d86ed7fbStbbdev     stopx = parms->stopx;
301d86ed7fbStbbdev     starty = parms->starty;
302d86ed7fbStbbdev     stopy = parms->stopy;
303d86ed7fbStbbdev     jitterscale = 40.0 * (scene.hres + scene.vres);
304d86ed7fbStbbdev     totaly = parms->scene.vres;
305d86ed7fbStbbdev 
306d86ed7fbStbbdev     int n;
307d86ed7fbStbbdev     nthreads = get_num_cpus();
308d86ed7fbStbbdev     char *nthreads_str = getenv("THR_NUM_THREADS");
309d86ed7fbStbbdev     if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
310d86ed7fbStbbdev         nthreads = n;
311d86ed7fbStbbdev     char *grain_str = getenv("THR_GRAINSIZE");
312d86ed7fbStbbdev     if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
313d86ed7fbStbbdev         grain_size = n;
314d86ed7fbStbbdev     pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
315d86ed7fbStbbdev     pthread_mutex_init(&MyMutex, nullptr);
316d86ed7fbStbbdev     pthread_mutex_init(&MyMutex2, nullptr);
317d86ed7fbStbbdev     pthread_mutex_init(&MyMutex3, nullptr);
318d86ed7fbStbbdev     generate_worklist(); // initialize schedule_thread_work() self-scheduler
319d86ed7fbStbbdev     for (int i = 0; i < nthreads; i++) {
320d86ed7fbStbbdev         pthread_create(
321d86ed7fbStbbdev             &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
322d86ed7fbStbbdev     }
323d86ed7fbStbbdev     for (int i = 0; i < nthreads; i++) {
324d86ed7fbStbbdev         void *exit_val;
325d86ed7fbStbbdev         pthread_join(threads[i], &exit_val);
326d86ed7fbStbbdev         // expect i = (int) exit_val
327d86ed7fbStbbdev     }
328d86ed7fbStbbdev 
329d86ed7fbStbbdev     return (nullptr);
330d86ed7fbStbbdev }
331