1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev     The original source for this example is
19d86ed7fbStbbdev     Copyright (c) 1994-2008 John E. Stone
20d86ed7fbStbbdev     All rights reserved.
21d86ed7fbStbbdev 
22d86ed7fbStbbdev     Redistribution and use in source and binary forms, with or without
23d86ed7fbStbbdev     modification, are permitted provided that the following conditions
24d86ed7fbStbbdev     are met:
25d86ed7fbStbbdev     1. Redistributions of source code must retain the above copyright
26d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer.
27d86ed7fbStbbdev     2. Redistributions in binary form must reproduce the above copyright
28d86ed7fbStbbdev        notice, this list of conditions and the following disclaimer in the
29d86ed7fbStbbdev        documentation and/or other materials provided with the distribution.
30d86ed7fbStbbdev     3. The name of the author may not be used to endorse or promote products
31d86ed7fbStbbdev        derived from this software without specific prior written permission.
32d86ed7fbStbbdev 
33d86ed7fbStbbdev     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34d86ed7fbStbbdev     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35d86ed7fbStbbdev     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36d86ed7fbStbbdev     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37d86ed7fbStbbdev     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38d86ed7fbStbbdev     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39d86ed7fbStbbdev     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40d86ed7fbStbbdev     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41d86ed7fbStbbdev     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42d86ed7fbStbbdev     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43d86ed7fbStbbdev     SUCH DAMAGE.
44d86ed7fbStbbdev */
45d86ed7fbStbbdev 
46d86ed7fbStbbdev #include <omp.h>
47d86ed7fbStbbdev #include <string.h>
48d86ed7fbStbbdev 
49d86ed7fbStbbdev #include "machine.hpp"
50d86ed7fbStbbdev #include "types.hpp"
51d86ed7fbStbbdev #include "macros.hpp"
52d86ed7fbStbbdev #include "vector.hpp"
53d86ed7fbStbbdev #include "tgafile.hpp"
54d86ed7fbStbbdev #include "trace.hpp"
55d86ed7fbStbbdev #include "light.hpp"
56d86ed7fbStbbdev #include "shade.hpp"
57d86ed7fbStbbdev #include "camera.hpp"
58d86ed7fbStbbdev #include "util.hpp"
59d86ed7fbStbbdev #include "intersect.hpp"
60d86ed7fbStbbdev #include "global.hpp"
61d86ed7fbStbbdev #include "ui.hpp"
62d86ed7fbStbbdev #include "tachyon_video.hpp"
63d86ed7fbStbbdev 
64d86ed7fbStbbdev // shared but read-only so could be private too
65d86ed7fbStbbdev static thr_parms *all_parms;
66d86ed7fbStbbdev static scenedef scene;
67d86ed7fbStbbdev static int startx;
68d86ed7fbStbbdev static int stopx;
69d86ed7fbStbbdev static int starty;
70d86ed7fbStbbdev static int stopy;
71d86ed7fbStbbdev static flt jitterscale;
72d86ed7fbStbbdev static int totaly, totalx;
73d86ed7fbStbbdev 
74d86ed7fbStbbdev static int grain_size = 50;
75d86ed7fbStbbdev const int DIVFACTOR = 2;
76d86ed7fbStbbdev 
77d86ed7fbStbbdev #define MIN(a, b) ((a) < (b) ? (a) : (b))
78d86ed7fbStbbdev 
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)79d86ed7fbStbbdev static color_t render_one_pixel(int x,
80d86ed7fbStbbdev                                 int y,
81d86ed7fbStbbdev                                 unsigned int *local_mbox,
82d86ed7fbStbbdev                                 unsigned int &serial,
83d86ed7fbStbbdev                                 int startx,
84d86ed7fbStbbdev                                 int stopx,
85d86ed7fbStbbdev                                 int starty,
86d86ed7fbStbbdev                                 int stopy) {
87d86ed7fbStbbdev     /* private vars moved inside loop */
88d86ed7fbStbbdev     ray primary, sample;
89d86ed7fbStbbdev     color col, avcol;
90d86ed7fbStbbdev     int R, G, B;
91d86ed7fbStbbdev     intersectstruct local_intersections;
92d86ed7fbStbbdev     int alias;
93d86ed7fbStbbdev     /* end private */
94d86ed7fbStbbdev 
95d86ed7fbStbbdev     primary = camray(&scene, x, y);
96d86ed7fbStbbdev     primary.intstruct = &local_intersections;
97d86ed7fbStbbdev     primary.flags = RT_RAY_REGULAR;
98d86ed7fbStbbdev 
99d86ed7fbStbbdev     serial++;
100d86ed7fbStbbdev     primary.serial = serial;
101d86ed7fbStbbdev     primary.mbox = local_mbox;
102d86ed7fbStbbdev     primary.maxdist = FHUGE;
103d86ed7fbStbbdev     primary.scene = &scene;
104d86ed7fbStbbdev     col = trace(&primary);
105d86ed7fbStbbdev 
106d86ed7fbStbbdev     serial = primary.serial;
107d86ed7fbStbbdev 
108d86ed7fbStbbdev     /* perform antialiasing if enabled.. */
109d86ed7fbStbbdev     if (scene.antialiasing > 0) {
110d86ed7fbStbbdev         for (alias = 0; alias < scene.antialiasing; alias++) {
111d86ed7fbStbbdev             serial++; /* increment serial number */
112d86ed7fbStbbdev             sample = primary; /* copy the regular primary ray to start with */
113d86ed7fbStbbdev             sample.serial = serial;
114d86ed7fbStbbdev 
115d86ed7fbStbbdev #pragma omp critical
116d86ed7fbStbbdev             {
117d86ed7fbStbbdev                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
118d86ed7fbStbbdev                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
119d86ed7fbStbbdev                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
120d86ed7fbStbbdev             }
121d86ed7fbStbbdev 
122d86ed7fbStbbdev             avcol = trace(&sample);
123d86ed7fbStbbdev 
124d86ed7fbStbbdev             serial = sample.serial; /* update our overall serial # */
125d86ed7fbStbbdev 
126d86ed7fbStbbdev             col.r += avcol.r;
127d86ed7fbStbbdev             col.g += avcol.g;
128d86ed7fbStbbdev             col.b += avcol.b;
129d86ed7fbStbbdev         }
130d86ed7fbStbbdev 
131d86ed7fbStbbdev         col.r /= (scene.antialiasing + 1.0);
132d86ed7fbStbbdev         col.g /= (scene.antialiasing + 1.0);
133d86ed7fbStbbdev         col.b /= (scene.antialiasing + 1.0);
134d86ed7fbStbbdev     }
135d86ed7fbStbbdev 
136d86ed7fbStbbdev     /* Handle overexposure and underexposure here... */
137d86ed7fbStbbdev     R = (int)(col.r * 255);
138d86ed7fbStbbdev     if (R > 255)
139d86ed7fbStbbdev         R = 255;
140d86ed7fbStbbdev     else if (R < 0)
141d86ed7fbStbbdev         R = 0;
142d86ed7fbStbbdev 
143d86ed7fbStbbdev     G = (int)(col.g * 255);
144d86ed7fbStbbdev     if (G > 255)
145d86ed7fbStbbdev         G = 255;
146d86ed7fbStbbdev     else if (G < 0)
147d86ed7fbStbbdev         G = 0;
148d86ed7fbStbbdev 
149d86ed7fbStbbdev     B = (int)(col.b * 255);
150d86ed7fbStbbdev     if (B > 255)
151d86ed7fbStbbdev         B = 255;
152d86ed7fbStbbdev     else if (B < 0)
153d86ed7fbStbbdev         B = 0;
154d86ed7fbStbbdev 
155d86ed7fbStbbdev     return video->get_color(R, G, B);
156d86ed7fbStbbdev }
157d86ed7fbStbbdev 
parallel_thread(patch * pchin,int depth)158d86ed7fbStbbdev static void parallel_thread(patch *pchin, int depth) {
159d86ed7fbStbbdev     unsigned char col[3];
160d86ed7fbStbbdev     col[0] = col[1] = col[2] = (32 * depth) % 256;
161d86ed7fbStbbdev     depth++;
162d86ed7fbStbbdev #pragma intel omp taskq firstprivate(depth)
163d86ed7fbStbbdev     {
164d86ed7fbStbbdev         int startx, stopx, starty, stopy;
165d86ed7fbStbbdev         int xs, ys;
166d86ed7fbStbbdev 
167d86ed7fbStbbdev         startx = pchin->startx;
168d86ed7fbStbbdev         stopx = pchin->stopx;
169d86ed7fbStbbdev         starty = pchin->starty;
170d86ed7fbStbbdev         stopy = pchin->stopy;
171d86ed7fbStbbdev 
172d86ed7fbStbbdev         if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) {
173d86ed7fbStbbdev             int xpatchsize = (stopx - startx) / DIVFACTOR + 1;
174d86ed7fbStbbdev             int ypatchsize = (stopy - starty) / DIVFACTOR + 1;
175d86ed7fbStbbdev             for (ys = starty; ys <= stopy; ys += ypatchsize)
176d86ed7fbStbbdev                 for (xs = startx; xs <= stopx; xs += xpatchsize) {
177d86ed7fbStbbdev                     patch pch;
178d86ed7fbStbbdev                     pch.startx = xs;
179d86ed7fbStbbdev                     pch.starty = ys;
180d86ed7fbStbbdev                     pch.stopx = MIN(xs + xpatchsize, stopx);
181d86ed7fbStbbdev                     pch.stopy = MIN(ys + ypatchsize, stopy);
182d86ed7fbStbbdev 
183d86ed7fbStbbdev #pragma intel omp task
184d86ed7fbStbbdev                     parallel_thread(&pch, depth);
185d86ed7fbStbbdev                 }
186d86ed7fbStbbdev         }
187d86ed7fbStbbdev         else {
188d86ed7fbStbbdev             /* just trace this patch */
189d86ed7fbStbbdev             unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
190d86ed7fbStbbdev             unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
191d86ed7fbStbbdev             memset(local_mbox, 0, mboxsize);
192d86ed7fbStbbdev 
193d86ed7fbStbbdev             drawing_area drawing(startx, totaly - stopy, stopx - startx, stopy - starty);
194d86ed7fbStbbdev             for (int i = 1, y = starty; y < stopy; ++y, i++) {
195d86ed7fbStbbdev                 if (!video->running)
196d86ed7fbStbbdev                     continue;
197d86ed7fbStbbdev                 drawing.set_pos(0, drawing.size_y - i);
198d86ed7fbStbbdev                 unsigned int serial = 5 * ((stopx - startx) + (stopy - starty) * totalx);
199d86ed7fbStbbdev                 for (int x = startx; x < stopx; x++) {
200d86ed7fbStbbdev                     color_t c =
201d86ed7fbStbbdev                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
202d86ed7fbStbbdev                     drawing.put_pixel(c);
203d86ed7fbStbbdev                 }
204d86ed7fbStbbdev             }
205d86ed7fbStbbdev             video->next_frame();
206d86ed7fbStbbdev         }
207d86ed7fbStbbdev     }
208d86ed7fbStbbdev }
209d86ed7fbStbbdev 
thread_trace(thr_parms * parms)210d86ed7fbStbbdev void *thread_trace(thr_parms *parms) {
211d86ed7fbStbbdev     // shared but read-only so could be private too
212d86ed7fbStbbdev     all_parms = parms;
213d86ed7fbStbbdev     scene = parms->scene;
214d86ed7fbStbbdev     startx = parms->startx;
215d86ed7fbStbbdev     stopx = parms->stopx;
216d86ed7fbStbbdev     starty = parms->starty;
217d86ed7fbStbbdev     stopy = parms->stopy;
218d86ed7fbStbbdev     jitterscale = 40.0 * (scene.hres + scene.vres);
219d86ed7fbStbbdev     totalx = parms->stopx - parms->startx + 1;
220d86ed7fbStbbdev     totaly = parms->scene.vres;
221d86ed7fbStbbdev 
222d86ed7fbStbbdev     patch pch;
223d86ed7fbStbbdev     pch.startx = startx;
224d86ed7fbStbbdev     pch.stopx = stopx;
225d86ed7fbStbbdev     pch.starty = starty;
226d86ed7fbStbbdev     pch.stopy = stopy;
227d86ed7fbStbbdev     int g;
228d86ed7fbStbbdev     char *grain_str = getenv("TASKQ_GRAINSIZE");
229d86ed7fbStbbdev     if (grain_str && (sscanf(grain_str, "%d", &g) > 0) && (g > 0))
230d86ed7fbStbbdev         grain_size = g;
231d86ed7fbStbbdev #pragma omp parallel
232d86ed7fbStbbdev     parallel_thread(&pch, 0);
233d86ed7fbStbbdev 
234d86ed7fbStbbdev     return (nullptr);
235d86ed7fbStbbdev }
236