1 //===----- Workshare.cpp - OpenMP workshare implementation ------ C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the KMPC interface 10 // for the loop construct plus other worksharing constructs that use the same 11 // interface as loops. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "Debug.h" 16 #include "Interface.h" 17 #include "Mapping.h" 18 #include "State.h" 19 #include "Synchronization.h" 20 #include "Types.h" 21 #include "Utils.h" 22 23 using namespace _OMP; 24 25 // TODO: 26 struct DynamicScheduleTracker { 27 int64_t Chunk; 28 int64_t LoopUpperBound; 29 int64_t NextLowerBound; 30 int64_t Stride; 31 kmp_sched_t ScheduleType; 32 DynamicScheduleTracker *NextDST; 33 }; 34 35 #define ASSERT0(...) 36 37 // used by the library for the interface with the app 38 #define DISPATCH_FINISHED 0 39 #define DISPATCH_NOTFINISHED 1 40 41 // used by dynamic scheduling 42 #define FINISHED 0 43 #define NOT_FINISHED 1 44 #define LAST_CHUNK 2 45 46 #pragma omp begin declare target device_type(nohost) 47 48 // TODO: This variable is a hack inherited from the old runtime. 49 static uint64_t SHARED(Cnt); 50 51 template <typename T, typename ST> struct omptarget_nvptx_LoopSupport { 52 //////////////////////////////////////////////////////////////////////////////// 53 // Loop with static scheduling with chunk 54 55 // Generic implementation of OMP loop scheduling with static policy 56 /*! \brief Calculate initial bounds for static loop and stride 57 * @param[in] loc location in code of the call (not used here) 58 * @param[in] global_tid global thread id 59 * @param[in] schetype type of scheduling (see omptarget-nvptx.h) 60 * @param[in] plastiter pointer to last iteration 61 * @param[in,out] pointer to loop lower bound. it will contain value of 62 * lower bound of first chunk 63 * @param[in,out] pointer to loop upper bound. It will contain value of 64 * upper bound of first chunk 65 * @param[in,out] pointer to loop stride. It will contain value of stride 66 * between two successive chunks executed by the same thread 67 * @param[in] loop increment bump 68 * @param[in] chunk size 69 */ 70 71 // helper function for static chunk 72 static void ForStaticChunk(int &last, T &lb, T &ub, ST &stride, ST chunk, 73 T entityId, T numberOfEntities) { 74 // each thread executes multiple chunks all of the same size, except 75 // the last one 76 // distance between two successive chunks 77 stride = numberOfEntities * chunk; 78 lb = lb + entityId * chunk; 79 T inputUb = ub; 80 ub = lb + chunk - 1; // Clang uses i <= ub 81 // Say ub' is the begining of the last chunk. Then who ever has a 82 // lower bound plus a multiple of the increment equal to ub' is 83 // the last one. 84 T beginingLastChunk = inputUb - (inputUb % chunk); 85 last = ((beginingLastChunk - lb) % stride) == 0; 86 } 87 88 //////////////////////////////////////////////////////////////////////////////// 89 // Loop with static scheduling without chunk 90 91 // helper function for static no chunk 92 static void ForStaticNoChunk(int &last, T &lb, T &ub, ST &stride, ST &chunk, 93 T entityId, T numberOfEntities) { 94 // No chunk size specified. Each thread or warp gets at most one 95 // chunk; chunks are all almost of equal size 96 T loopSize = ub - lb + 1; 97 98 chunk = loopSize / numberOfEntities; 99 T leftOver = loopSize - chunk * numberOfEntities; 100 101 if (entityId < leftOver) { 102 chunk++; 103 lb = lb + entityId * chunk; 104 } else { 105 lb = lb + entityId * chunk + leftOver; 106 } 107 108 T inputUb = ub; 109 ub = lb + chunk - 1; // Clang uses i <= ub 110 last = lb <= inputUb && inputUb <= ub; 111 stride = loopSize; // make sure we only do 1 chunk per warp 112 } 113 114 //////////////////////////////////////////////////////////////////////////////// 115 // Support for Static Init 116 117 static void for_static_init(int32_t, int32_t schedtype, 118 int32_t *plastiter, T *plower, T *pupper, 119 ST *pstride, ST chunk, bool IsSPMDExecutionMode) { 120 int32_t gtid = omp_get_thread_num(); 121 int numberOfActiveOMPThreads = omp_get_num_threads(); 122 123 // All warps that are in excess of the maximum requested, do 124 // not execute the loop 125 ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads, 126 "current thread is not needed here; error"); 127 128 // copy 129 int lastiter = 0; 130 T lb = *plower; 131 T ub = *pupper; 132 ST stride = *pstride; 133 134 // init 135 switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) { 136 case kmp_sched_static_chunk: { 137 if (chunk > 0) { 138 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 139 numberOfActiveOMPThreads); 140 break; 141 } 142 [[fallthrough]]; 143 } // note: if chunk <=0, use nochunk 144 case kmp_sched_static_balanced_chunk: { 145 if (chunk > 0) { 146 // round up to make sure the chunk is enough to cover all iterations 147 T tripCount = ub - lb + 1; // +1 because ub is inclusive 148 T span = (tripCount + numberOfActiveOMPThreads - 1) / 149 numberOfActiveOMPThreads; 150 // perform chunk adjustment 151 chunk = (span + chunk - 1) & ~(chunk - 1); 152 153 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 154 T oldUb = ub; 155 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 156 numberOfActiveOMPThreads); 157 if (ub > oldUb) 158 ub = oldUb; 159 break; 160 } 161 [[fallthrough]]; 162 } // note: if chunk <=0, use nochunk 163 case kmp_sched_static_nochunk: { 164 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid, 165 numberOfActiveOMPThreads); 166 break; 167 } 168 case kmp_sched_distr_static_chunk: { 169 if (chunk > 0) { 170 ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 171 omp_get_num_teams()); 172 break; 173 } 174 [[fallthrough]]; 175 } // note: if chunk <=0, use nochunk 176 case kmp_sched_distr_static_nochunk: { 177 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 178 omp_get_num_teams()); 179 break; 180 } 181 case kmp_sched_distr_static_chunk_sched_static_chunkone: { 182 ForStaticChunk(lastiter, lb, ub, stride, chunk, 183 numberOfActiveOMPThreads * omp_get_team_num() + gtid, 184 omp_get_num_teams() * numberOfActiveOMPThreads); 185 break; 186 } 187 default: { 188 // ASSERT(LT_FUSSY, 0, "unknown schedtype %d", (int)schedtype); 189 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 190 numberOfActiveOMPThreads); 191 break; 192 } 193 } 194 // copy back 195 *plastiter = lastiter; 196 *plower = lb; 197 *pupper = ub; 198 *pstride = stride; 199 } 200 201 //////////////////////////////////////////////////////////////////////////////// 202 // Support for dispatch Init 203 204 static int OrderedSchedule(kmp_sched_t schedule) { 205 return schedule >= kmp_sched_ordered_first && 206 schedule <= kmp_sched_ordered_last; 207 } 208 209 static void dispatch_init(IdentTy *loc, int32_t threadId, 210 kmp_sched_t schedule, T lb, T ub, ST st, ST chunk, 211 DynamicScheduleTracker *DST) { 212 int tid = mapping::getThreadIdInBlock(); 213 T tnum = omp_get_num_threads(); 214 T tripCount = ub - lb + 1; // +1 because ub is inclusive 215 ASSERT0(LT_FUSSY, threadId < tnum, 216 "current thread is not needed here; error"); 217 218 /* Currently just ignore the monotonic and non-monotonic modifiers 219 * (the compiler isn't producing them * yet anyway). 220 * When it is we'll want to look at them somewhere here and use that 221 * information to add to our schedule choice. We shouldn't need to pass 222 * them on, they merely affect which schedule we can legally choose for 223 * various dynamic cases. (In particular, whether or not a stealing scheme 224 * is legal). 225 */ 226 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 227 228 // Process schedule. 229 if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) { 230 if (OrderedSchedule(schedule)) 231 __kmpc_barrier(loc, threadId); 232 schedule = kmp_sched_static_chunk; 233 chunk = tripCount; // one thread gets the whole loop 234 } else if (schedule == kmp_sched_runtime) { 235 // process runtime 236 omp_sched_t rtSched; 237 int ChunkInt; 238 omp_get_schedule(&rtSched, &ChunkInt); 239 chunk = ChunkInt; 240 switch (rtSched) { 241 case omp_sched_static: { 242 if (chunk > 0) 243 schedule = kmp_sched_static_chunk; 244 else 245 schedule = kmp_sched_static_nochunk; 246 break; 247 } 248 case omp_sched_auto: { 249 schedule = kmp_sched_static_chunk; 250 chunk = 1; 251 break; 252 } 253 case omp_sched_dynamic: 254 case omp_sched_guided: { 255 schedule = kmp_sched_dynamic; 256 break; 257 } 258 } 259 } else if (schedule == kmp_sched_auto) { 260 schedule = kmp_sched_static_chunk; 261 chunk = 1; 262 } else { 263 // ASSERT(LT_FUSSY, 264 // schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 265 // "unknown schedule %d & chunk %lld\n", (int)schedule, 266 // (long long)chunk); 267 } 268 269 // init schedules 270 if (schedule == kmp_sched_static_chunk) { 271 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 272 // save sched state 273 DST->ScheduleType = schedule; 274 // save ub 275 DST->LoopUpperBound = ub; 276 // compute static chunk 277 ST stride; 278 int lastiter = 0; 279 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 280 // save computed params 281 DST->Chunk = chunk; 282 DST->NextLowerBound = lb; 283 DST->Stride = stride; 284 } else if (schedule == kmp_sched_static_balanced_chunk) { 285 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 286 // save sched state 287 DST->ScheduleType = schedule; 288 // save ub 289 DST->LoopUpperBound = ub; 290 // compute static chunk 291 ST stride; 292 int lastiter = 0; 293 // round up to make sure the chunk is enough to cover all iterations 294 T span = (tripCount + tnum - 1) / tnum; 295 // perform chunk adjustment 296 chunk = (span + chunk - 1) & ~(chunk - 1); 297 298 T oldUb = ub; 299 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 300 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 301 if (ub > oldUb) 302 ub = oldUb; 303 // save computed params 304 DST->Chunk = chunk; 305 DST->NextLowerBound = lb; 306 DST->Stride = stride; 307 } else if (schedule == kmp_sched_static_nochunk) { 308 ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value"); 309 // save sched state 310 DST->ScheduleType = schedule; 311 // save ub 312 DST->LoopUpperBound = ub; 313 // compute static chunk 314 ST stride; 315 int lastiter = 0; 316 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 317 // save computed params 318 DST->Chunk = chunk; 319 DST->NextLowerBound = lb; 320 DST->Stride = stride; 321 } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) { 322 // save data 323 DST->ScheduleType = schedule; 324 if (chunk < 1) 325 chunk = 1; 326 DST->Chunk = chunk; 327 DST->LoopUpperBound = ub; 328 DST->NextLowerBound = lb; 329 __kmpc_barrier(loc, threadId); 330 if (tid == 0) { 331 Cnt = 0; 332 fence::team(__ATOMIC_SEQ_CST); 333 } 334 __kmpc_barrier(loc, threadId); 335 } 336 } 337 338 //////////////////////////////////////////////////////////////////////////////// 339 // Support for dispatch next 340 341 static uint64_t NextIter() { 342 __kmpc_impl_lanemask_t active = mapping::activemask(); 343 uint32_t leader = utils::ffs(active) - 1; 344 uint32_t change = utils::popc(active); 345 __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT(); 346 unsigned int rank = utils::popc(active & lane_mask_lt); 347 uint64_t warp_res = 0; 348 if (rank == 0) { 349 warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST); 350 } 351 warp_res = utils::shuffle(active, warp_res, leader); 352 return warp_res + rank; 353 } 354 355 static int DynamicNextChunk(T &lb, T &ub, T chunkSize, T loopLowerBound, 356 T loopUpperBound) { 357 T N = NextIter(); 358 lb = loopLowerBound + N * chunkSize; 359 ub = lb + chunkSize - 1; // Clang uses i <= ub 360 361 // 3 result cases: 362 // a. lb and ub < loopUpperBound --> NOT_FINISHED 363 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk --> 364 // NOT_FINISHED 365 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED 366 // a. 367 if (lb <= loopUpperBound && ub < loopUpperBound) { 368 return NOT_FINISHED; 369 } 370 // b. 371 if (lb <= loopUpperBound) { 372 ub = loopUpperBound; 373 return LAST_CHUNK; 374 } 375 // c. if we are here, we are in case 'c' 376 lb = loopUpperBound + 2; 377 ub = loopUpperBound + 1; 378 return FINISHED; 379 } 380 381 static int dispatch_next(IdentTy *loc, int32_t gtid, int32_t *plast, 382 T *plower, T *pupper, ST *pstride, 383 DynamicScheduleTracker *DST) { 384 // ID of a thread in its own warp 385 386 // automatically selects thread or warp ID based on selected implementation 387 ASSERT0(LT_FUSSY, gtid < omp_get_num_threads(), 388 "current thread is not needed here; error"); 389 // retrieve schedule 390 kmp_sched_t schedule = DST->ScheduleType; 391 392 // xxx reduce to one 393 if (schedule == kmp_sched_static_chunk || 394 schedule == kmp_sched_static_nochunk) { 395 T myLb = DST->NextLowerBound; 396 T ub = DST->LoopUpperBound; 397 // finished? 398 if (myLb > ub) { 399 return DISPATCH_FINISHED; 400 } 401 // not finished, save current bounds 402 ST chunk = DST->Chunk; 403 *plower = myLb; 404 T myUb = myLb + chunk - 1; // Clang uses i <= ub 405 if (myUb > ub) 406 myUb = ub; 407 *pupper = myUb; 408 *plast = (int32_t)(myUb == ub); 409 410 // increment next lower bound by the stride 411 ST stride = DST->Stride; 412 DST->NextLowerBound = myLb + stride; 413 return DISPATCH_NOTFINISHED; 414 } 415 ASSERT0(LT_FUSSY, 416 schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 417 "bad sched"); 418 T myLb, myUb; 419 int finished = DynamicNextChunk(myLb, myUb, DST->Chunk, DST->NextLowerBound, 420 DST->LoopUpperBound); 421 422 if (finished == FINISHED) 423 return DISPATCH_FINISHED; 424 425 // not finished (either not finished or last chunk) 426 *plast = (int32_t)(finished == LAST_CHUNK); 427 *plower = myLb; 428 *pupper = myUb; 429 *pstride = 1; 430 431 return DISPATCH_NOTFINISHED; 432 } 433 434 static void dispatch_fini() { 435 // nothing 436 } 437 438 //////////////////////////////////////////////////////////////////////////////// 439 // end of template class that encapsulate all the helper functions 440 //////////////////////////////////////////////////////////////////////////////// 441 }; 442 443 //////////////////////////////////////////////////////////////////////////////// 444 // KMP interface implementation (dyn loops) 445 //////////////////////////////////////////////////////////////////////////////// 446 447 // TODO: This is a stopgap. We probably want to expand the dispatch API to take 448 // an DST pointer which can then be allocated properly without malloc. 449 static DynamicScheduleTracker *THREAD_LOCAL(ThreadDSTPtr); 450 451 // Create a new DST, link the current one, and define the new as current. 452 static DynamicScheduleTracker *pushDST() { 453 DynamicScheduleTracker *NewDST = static_cast<DynamicScheduleTracker *>( 454 memory::allocGlobal(sizeof(DynamicScheduleTracker), "new DST")); 455 *NewDST = DynamicScheduleTracker({0}); 456 NewDST->NextDST = ThreadDSTPtr; 457 ThreadDSTPtr = NewDST; 458 return ThreadDSTPtr; 459 } 460 461 // Return the current DST. 462 static DynamicScheduleTracker *peekDST() { return ThreadDSTPtr; } 463 464 // Pop the current DST and restore the last one. 465 static void popDST() { 466 DynamicScheduleTracker *OldDST = ThreadDSTPtr->NextDST; 467 memory::freeGlobal(ThreadDSTPtr, "remove DST"); 468 ThreadDSTPtr = OldDST; 469 } 470 471 extern "C" { 472 473 // init 474 void __kmpc_dispatch_init_4(IdentTy *loc, int32_t tid, int32_t schedule, 475 int32_t lb, int32_t ub, int32_t st, int32_t chunk) { 476 FunctionTracingRAII(); 477 DynamicScheduleTracker *DST = pushDST(); 478 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_init( 479 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 480 } 481 482 void __kmpc_dispatch_init_4u(IdentTy *loc, int32_t tid, int32_t schedule, 483 uint32_t lb, uint32_t ub, int32_t st, 484 int32_t chunk) { 485 FunctionTracingRAII(); 486 DynamicScheduleTracker *DST = pushDST(); 487 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_init( 488 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 489 } 490 491 void __kmpc_dispatch_init_8(IdentTy *loc, int32_t tid, int32_t schedule, 492 int64_t lb, int64_t ub, int64_t st, int64_t chunk) { 493 FunctionTracingRAII(); 494 DynamicScheduleTracker *DST = pushDST(); 495 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_init( 496 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 497 } 498 499 void __kmpc_dispatch_init_8u(IdentTy *loc, int32_t tid, int32_t schedule, 500 uint64_t lb, uint64_t ub, int64_t st, 501 int64_t chunk) { 502 FunctionTracingRAII(); 503 DynamicScheduleTracker *DST = pushDST(); 504 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_init( 505 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 506 } 507 508 // next 509 int __kmpc_dispatch_next_4(IdentTy *loc, int32_t tid, int32_t *p_last, 510 int32_t *p_lb, int32_t *p_ub, int32_t *p_st) { 511 FunctionTracingRAII(); 512 DynamicScheduleTracker *DST = peekDST(); 513 return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next( 514 loc, tid, p_last, p_lb, p_ub, p_st, DST); 515 } 516 517 int __kmpc_dispatch_next_4u(IdentTy *loc, int32_t tid, int32_t *p_last, 518 uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) { 519 FunctionTracingRAII(); 520 DynamicScheduleTracker *DST = peekDST(); 521 return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next( 522 loc, tid, p_last, p_lb, p_ub, p_st, DST); 523 } 524 525 int __kmpc_dispatch_next_8(IdentTy *loc, int32_t tid, int32_t *p_last, 526 int64_t *p_lb, int64_t *p_ub, int64_t *p_st) { 527 FunctionTracingRAII(); 528 DynamicScheduleTracker *DST = peekDST(); 529 return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next( 530 loc, tid, p_last, p_lb, p_ub, p_st, DST); 531 } 532 533 int __kmpc_dispatch_next_8u(IdentTy *loc, int32_t tid, int32_t *p_last, 534 uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) { 535 FunctionTracingRAII(); 536 DynamicScheduleTracker *DST = peekDST(); 537 return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next( 538 loc, tid, p_last, p_lb, p_ub, p_st, DST); 539 } 540 541 // fini 542 void __kmpc_dispatch_fini_4(IdentTy *loc, int32_t tid) { 543 FunctionTracingRAII(); 544 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini(); 545 popDST(); 546 } 547 548 void __kmpc_dispatch_fini_4u(IdentTy *loc, int32_t tid) { 549 FunctionTracingRAII(); 550 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini(); 551 popDST(); 552 } 553 554 void __kmpc_dispatch_fini_8(IdentTy *loc, int32_t tid) { 555 FunctionTracingRAII(); 556 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini(); 557 popDST(); 558 } 559 560 void __kmpc_dispatch_fini_8u(IdentTy *loc, int32_t tid) { 561 FunctionTracingRAII(); 562 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini(); 563 popDST(); 564 } 565 566 //////////////////////////////////////////////////////////////////////////////// 567 // KMP interface implementation (static loops) 568 //////////////////////////////////////////////////////////////////////////////// 569 570 void __kmpc_for_static_init_4(IdentTy *loc, int32_t global_tid, 571 int32_t schedtype, int32_t *plastiter, 572 int32_t *plower, int32_t *pupper, 573 int32_t *pstride, int32_t incr, int32_t chunk) { 574 FunctionTracingRAII(); 575 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 576 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 577 mapping::isSPMDMode()); 578 } 579 580 void __kmpc_for_static_init_4u(IdentTy *loc, int32_t global_tid, 581 int32_t schedtype, int32_t *plastiter, 582 uint32_t *plower, uint32_t *pupper, 583 int32_t *pstride, int32_t incr, int32_t chunk) { 584 FunctionTracingRAII(); 585 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 586 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 587 mapping::isSPMDMode()); 588 } 589 590 void __kmpc_for_static_init_8(IdentTy *loc, int32_t global_tid, 591 int32_t schedtype, int32_t *plastiter, 592 int64_t *plower, int64_t *pupper, 593 int64_t *pstride, int64_t incr, int64_t chunk) { 594 FunctionTracingRAII(); 595 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 596 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 597 mapping::isSPMDMode()); 598 } 599 600 void __kmpc_for_static_init_8u(IdentTy *loc, int32_t global_tid, 601 int32_t schedtype, int32_t *plastiter, 602 uint64_t *plower, uint64_t *pupper, 603 int64_t *pstride, int64_t incr, int64_t chunk) { 604 FunctionTracingRAII(); 605 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 606 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 607 mapping::isSPMDMode()); 608 } 609 610 void __kmpc_distribute_static_init_4(IdentTy *loc, int32_t global_tid, 611 int32_t schedtype, int32_t *plastiter, 612 int32_t *plower, int32_t *pupper, 613 int32_t *pstride, int32_t incr, 614 int32_t chunk) { 615 FunctionTracingRAII(); 616 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 617 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 618 mapping::isSPMDMode()); 619 } 620 621 void __kmpc_distribute_static_init_4u(IdentTy *loc, int32_t global_tid, 622 int32_t schedtype, int32_t *plastiter, 623 uint32_t *plower, uint32_t *pupper, 624 int32_t *pstride, int32_t incr, 625 int32_t chunk) { 626 FunctionTracingRAII(); 627 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 628 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 629 mapping::isSPMDMode()); 630 } 631 632 void __kmpc_distribute_static_init_8(IdentTy *loc, int32_t global_tid, 633 int32_t schedtype, int32_t *plastiter, 634 int64_t *plower, int64_t *pupper, 635 int64_t *pstride, int64_t incr, 636 int64_t chunk) { 637 FunctionTracingRAII(); 638 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 639 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 640 mapping::isSPMDMode()); 641 } 642 643 void __kmpc_distribute_static_init_8u(IdentTy *loc, int32_t global_tid, 644 int32_t schedtype, int32_t *plastiter, 645 uint64_t *plower, uint64_t *pupper, 646 int64_t *pstride, int64_t incr, 647 int64_t chunk) { 648 FunctionTracingRAII(); 649 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 650 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 651 mapping::isSPMDMode()); 652 } 653 654 void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) { 655 FunctionTracingRAII(); 656 } 657 658 void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) { 659 FunctionTracingRAII(); 660 } 661 } 662 663 #pragma omp end declare target 664