1 /* 2 Copyright (c) 2005-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 The original source for this example is 19 Copyright (c) 1994-2008 John E. Stone 20 All rights reserved. 21 22 Redistribution and use in source and binary forms, with or without 23 modification, are permitted provided that the following conditions 24 are met: 25 1. Redistributions of source code must retain the above copyright 26 notice, this list of conditions and the following disclaimer. 27 2. Redistributions in binary form must reproduce the above copyright 28 notice, this list of conditions and the following disclaimer in the 29 documentation and/or other materials provided with the distribution. 30 3. The name of the author may not be used to endorse or promote products 31 derived from this software without specific prior written permission. 32 33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 SUCH DAMAGE. 44 */ 45 46 #include "machine.hpp" 47 #include "types.hpp" 48 #include "macros.hpp" 49 #include "vector.hpp" 50 #include "tgafile.hpp" 51 #include "trace.hpp" 52 #include "light.hpp" 53 #include "shade.hpp" 54 #include "camera.hpp" 55 #include "util.hpp" 56 #include "intersect.hpp" 57 #include "global.hpp" 58 #include "ui.hpp" 59 #include "tachyon_video.hpp" 60 61 // shared but read-only so could be private too 62 static thr_parms *all_parms; 63 static scenedef scene; 64 static int startx; 65 static int stopx; 66 static int starty; 67 static int stopy; 68 static flt jitterscale; 69 static int totaly; 70 static int nthreads; 71 72 static int grain_size = 50; 73 const int DIVFACTOR = 2; 74 75 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 76 77 #ifdef _WIN32 78 #include <windows.h> 79 #include "pthread_w.hpp" 80 #else 81 #include <pthread.h> 82 #endif 83 84 static pthread_mutex_t MyMutex, MyMutex2, MyMutex3; 85 86 static color_t render_one_pixel(int x, 87 int y, 88 unsigned int *local_mbox, 89 unsigned int &serial, 90 int startx, 91 int stopx, 92 int starty, 93 int stopy) { 94 /* private vars moved inside loop */ 95 ray primary, sample; 96 color col, avcol; 97 int R, G, B; 98 intersectstruct local_intersections; 99 int alias; 100 /* end private */ 101 102 primary = camray(&scene, x, y); 103 primary.intstruct = &local_intersections; 104 primary.flags = RT_RAY_REGULAR; 105 106 serial++; 107 primary.serial = serial; 108 primary.mbox = local_mbox; 109 primary.maxdist = FHUGE; 110 primary.scene = &scene; 111 col = trace(&primary); 112 113 serial = primary.serial; 114 115 /* perform antialiasing if enabled.. */ 116 if (scene.antialiasing > 0) { 117 for (alias = 0; alias < scene.antialiasing; alias++) { 118 serial++; /* increment serial number */ 119 sample = primary; /* copy the regular primary ray to start with */ 120 sample.serial = serial; 121 122 { 123 pthread_mutex_lock(&MyMutex); 124 sample.d.x += ((rand() % 100) - 50) / jitterscale; 125 sample.d.y += ((rand() % 100) - 50) / jitterscale; 126 sample.d.z += ((rand() % 100) - 50) / jitterscale; 127 pthread_mutex_unlock(&MyMutex); 128 } 129 130 avcol = trace(&sample); 131 132 serial = sample.serial; /* update our overall serial # */ 133 134 col.r += avcol.r; 135 col.g += avcol.g; 136 col.b += avcol.b; 137 } 138 139 col.r /= (scene.antialiasing + 1.0); 140 col.g /= (scene.antialiasing + 1.0); 141 col.b /= (scene.antialiasing + 1.0); 142 } 143 144 /* Handle overexposure and underexposure here... */ 145 R = (int)(col.r * 255); 146 if (R > 255) 147 R = 255; 148 else if (R < 0) 149 R = 0; 150 151 G = (int)(col.g * 255); 152 if (G > 255) 153 G = 255; 154 else if (G < 0) 155 G = 0; 156 157 B = (int)(col.b * 255); 158 if (B > 255) 159 B = 255; 160 else if (B < 0) 161 B = 0; 162 163 return video->get_color(R, G, B); 164 } 165 166 // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do 167 168 typedef struct work_queue_entry_s { 169 patch pch; 170 struct work_queue_entry_s *next; 171 } work_queue_entry_t; 172 static work_queue_entry_t *work_queue_head = nullptr; 173 static work_queue_entry_t *work_queue_tail = nullptr; 174 175 static void generate_work(patch *pchin) { 176 int startx, stopx, starty, stopy; 177 int xs, ys; 178 179 startx = pchin->startx; 180 stopx = pchin->stopx; 181 starty = pchin->starty; 182 stopy = pchin->stopy; 183 184 if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) { 185 int xpatchsize = (stopx - startx) / DIVFACTOR + 1; 186 int ypatchsize = (stopy - starty) / DIVFACTOR + 1; 187 for (ys = starty; ys <= stopy; ys += ypatchsize) 188 for (xs = startx; xs <= stopx; xs += xpatchsize) { 189 patch pch; 190 pch.startx = xs; 191 pch.starty = ys; 192 pch.stopx = MIN(xs + xpatchsize, stopx); 193 pch.stopy = MIN(ys + ypatchsize, stopy); 194 195 generate_work(&pch); 196 } 197 } 198 else { 199 /* just trace this patch */ 200 work_queue_entry_t *q = (work_queue_entry_t *)malloc(sizeof(work_queue_entry_t)); 201 q->pch.starty = starty; 202 q->pch.stopy = stopy; 203 q->pch.startx = startx; 204 q->pch.stopx = stopx; 205 q->next = nullptr; 206 if (work_queue_head == nullptr) { 207 work_queue_head = q; 208 } 209 else { 210 work_queue_tail->next = q; 211 } 212 work_queue_tail = q; 213 } 214 } 215 216 static void generate_worklist(void) { 217 patch pch; 218 pch.startx = startx; 219 pch.stopx = stopx; 220 pch.starty = starty; 221 pch.stopy = stopy; 222 generate_work(&pch); 223 } 224 225 static bool schedule_thread_work(patch &pch) { 226 pthread_mutex_lock(&MyMutex3); 227 work_queue_entry_t *q = work_queue_head; 228 if (q != nullptr) { 229 pch = q->pch; 230 work_queue_head = work_queue_head->next; 231 } 232 pthread_mutex_unlock(&MyMutex3); 233 return (q != nullptr); 234 } 235 236 static void parallel_thread(void *arg) { 237 // thread-local storage 238 unsigned int serial = 1; 239 unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20); 240 unsigned int *local_mbox = (unsigned int *)alloca(mboxsize); 241 memset(local_mbox, 0, mboxsize); 242 243 // int thread_no = (int) arg; 244 patch pch; 245 while (schedule_thread_work(pch)) { 246 { 247 drawing_area drawing( 248 pch.startx, totaly - pch.stopy, pch.stopx - pch.startx, pch.stopy - pch.starty); 249 for (int i = 1, y = pch.starty; y < pch.stopy; ++y, i++) { 250 drawing.set_pos(0, drawing.size_y - i); 251 for (int x = pch.startx; x < pch.stopx; x++) { 252 color_t c = 253 render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy); 254 drawing.put_pixel(c); 255 } 256 } 257 } 258 if (!video->next_frame()) 259 pthread_exit(arg); 260 } 261 pthread_exit(arg); 262 } 263 264 // need this (for each platform) so we can create the right number of threads, to work efficiently 265 266 #if defined(_WIN32) 267 268 static int get_num_cpus(void) { 269 SYSTEM_INFO si; 270 GetNativeSystemInfo(&si); 271 return (int)si.dwNumberOfProcessors; 272 } 273 274 #elif defined(__APPLE__) 275 276 #include "sys/types.hpp" 277 #include "sys/sysctl.hpp" 278 static int get_num_cpus(void) { 279 int name[2] = { CTL_HW, HW_NCPU }; 280 int ncpu; 281 std::size_t size = sizeof(ncpu); 282 sysctl(name, 2, &ncpu, &size, nullptr, 0); 283 return ncpu; 284 } 285 286 #else /* Linux */ 287 288 #include <sys/sysinfo.h> 289 static int get_num_cpus(void) { 290 return get_nprocs(); 291 } 292 293 #endif 294 295 void *thread_trace(thr_parms *parms) { 296 // shared but read-only so could be private too 297 all_parms = parms; 298 scene = parms->scene; 299 startx = parms->startx; 300 stopx = parms->stopx; 301 starty = parms->starty; 302 stopy = parms->stopy; 303 jitterscale = 40.0 * (scene.hres + scene.vres); 304 totaly = parms->scene.vres; 305 306 int n; 307 nthreads = get_num_cpus(); 308 char *nthreads_str = getenv("THR_NUM_THREADS"); 309 if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0)) 310 nthreads = n; 311 char *grain_str = getenv("THR_GRAINSIZE"); 312 if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0)) 313 grain_size = n; 314 pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t)); 315 pthread_mutex_init(&MyMutex, nullptr); 316 pthread_mutex_init(&MyMutex2, nullptr); 317 pthread_mutex_init(&MyMutex3, nullptr); 318 generate_worklist(); // initialize schedule_thread_work() self-scheduler 319 for (int i = 0; i < nthreads; i++) { 320 pthread_create( 321 &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i)); 322 } 323 for (int i = 0; i < nthreads; i++) { 324 void *exit_val; 325 pthread_join(threads[i], &exit_val); 326 // expect i = (int) exit_val 327 } 328 329 return (nullptr); 330 } 331