1 //===----- Workshare.cpp - OpenMP workshare implementation ------ C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the KMPC interface 10 // for the loop construct plus other worksharing constructs that use the same 11 // interface as loops. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "Debug.h" 16 #include "Interface.h" 17 #include "Mapping.h" 18 #include "State.h" 19 #include "Synchronization.h" 20 #include "Types.h" 21 #include "Utils.h" 22 23 using namespace _OMP; 24 25 // TODO: 26 struct DynamicScheduleTracker { 27 int64_t Chunk; 28 int64_t LoopUpperBound; 29 int64_t NextLowerBound; 30 int64_t Stride; 31 kmp_sched_t ScheduleType; 32 DynamicScheduleTracker *NextDST; 33 }; 34 35 #define ASSERT0(...) 36 37 // used by the library for the interface with the app 38 #define DISPATCH_FINISHED 0 39 #define DISPATCH_NOTFINISHED 1 40 41 // used by dynamic scheduling 42 #define FINISHED 0 43 #define NOT_FINISHED 1 44 #define LAST_CHUNK 2 45 46 #pragma omp declare target 47 48 // TODO: This variable is a hack inherited from the old runtime. 49 uint64_t SHARED(Cnt); 50 51 template <typename T, typename ST> struct omptarget_nvptx_LoopSupport { 52 //////////////////////////////////////////////////////////////////////////////// 53 // Loop with static scheduling with chunk 54 55 // Generic implementation of OMP loop scheduling with static policy 56 /*! \brief Calculate initial bounds for static loop and stride 57 * @param[in] loc location in code of the call (not used here) 58 * @param[in] global_tid global thread id 59 * @param[in] schetype type of scheduling (see omptarget-nvptx.h) 60 * @param[in] plastiter pointer to last iteration 61 * @param[in,out] pointer to loop lower bound. it will contain value of 62 * lower bound of first chunk 63 * @param[in,out] pointer to loop upper bound. It will contain value of 64 * upper bound of first chunk 65 * @param[in,out] pointer to loop stride. It will contain value of stride 66 * between two successive chunks executed by the same thread 67 * @param[in] loop increment bump 68 * @param[in] chunk size 69 */ 70 71 // helper function for static chunk 72 static void ForStaticChunk(int &last, T &lb, T &ub, ST &stride, ST chunk, 73 T entityId, T numberOfEntities) { 74 // each thread executes multiple chunks all of the same size, except 75 // the last one 76 // distance between two successive chunks 77 stride = numberOfEntities * chunk; 78 lb = lb + entityId * chunk; 79 T inputUb = ub; 80 ub = lb + chunk - 1; // Clang uses i <= ub 81 // Say ub' is the begining of the last chunk. Then who ever has a 82 // lower bound plus a multiple of the increment equal to ub' is 83 // the last one. 84 T beginingLastChunk = inputUb - (inputUb % chunk); 85 last = ((beginingLastChunk - lb) % stride) == 0; 86 } 87 88 //////////////////////////////////////////////////////////////////////////////// 89 // Loop with static scheduling without chunk 90 91 // helper function for static no chunk 92 static void ForStaticNoChunk(int &last, T &lb, T &ub, ST &stride, ST &chunk, 93 T entityId, T numberOfEntities) { 94 // No chunk size specified. Each thread or warp gets at most one 95 // chunk; chunks are all almost of equal size 96 T loopSize = ub - lb + 1; 97 98 chunk = loopSize / numberOfEntities; 99 T leftOver = loopSize - chunk * numberOfEntities; 100 101 if (entityId < leftOver) { 102 chunk++; 103 lb = lb + entityId * chunk; 104 } else { 105 lb = lb + entityId * chunk + leftOver; 106 } 107 108 T inputUb = ub; 109 ub = lb + chunk - 1; // Clang uses i <= ub 110 last = lb <= inputUb && inputUb <= ub; 111 stride = loopSize; // make sure we only do 1 chunk per warp 112 } 113 114 //////////////////////////////////////////////////////////////////////////////// 115 // Support for Static Init 116 117 static void for_static_init(int32_t, int32_t schedtype, 118 int32_t *plastiter, T *plower, T *pupper, 119 ST *pstride, ST chunk, bool IsSPMDExecutionMode) { 120 int32_t gtid = omp_get_thread_num(); 121 int numberOfActiveOMPThreads = omp_get_num_threads(); 122 123 // All warps that are in excess of the maximum requested, do 124 // not execute the loop 125 ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads, 126 "current thread is not needed here; error"); 127 128 // copy 129 int lastiter = 0; 130 T lb = *plower; 131 T ub = *pupper; 132 ST stride = *pstride; 133 134 // init 135 switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) { 136 case kmp_sched_static_chunk: { 137 if (chunk > 0) { 138 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 139 numberOfActiveOMPThreads); 140 break; 141 } 142 } // note: if chunk <=0, use nochunk 143 case kmp_sched_static_balanced_chunk: { 144 if (chunk > 0) { 145 // round up to make sure the chunk is enough to cover all iterations 146 T tripCount = ub - lb + 1; // +1 because ub is inclusive 147 T span = (tripCount + numberOfActiveOMPThreads - 1) / 148 numberOfActiveOMPThreads; 149 // perform chunk adjustment 150 chunk = (span + chunk - 1) & ~(chunk - 1); 151 152 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 153 T oldUb = ub; 154 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 155 numberOfActiveOMPThreads); 156 if (ub > oldUb) 157 ub = oldUb; 158 break; 159 } 160 } // note: if chunk <=0, use nochunk 161 case kmp_sched_static_nochunk: { 162 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid, 163 numberOfActiveOMPThreads); 164 break; 165 } 166 case kmp_sched_distr_static_chunk: { 167 if (chunk > 0) { 168 ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 169 omp_get_num_teams()); 170 break; 171 } // note: if chunk <=0, use nochunk 172 } 173 case kmp_sched_distr_static_nochunk: { 174 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 175 omp_get_num_teams()); 176 break; 177 } 178 case kmp_sched_distr_static_chunk_sched_static_chunkone: { 179 ForStaticChunk(lastiter, lb, ub, stride, chunk, 180 numberOfActiveOMPThreads * omp_get_team_num() + gtid, 181 omp_get_num_teams() * numberOfActiveOMPThreads); 182 break; 183 } 184 default: { 185 // ASSERT(LT_FUSSY, 0, "unknown schedtype %d", (int)schedtype); 186 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 187 numberOfActiveOMPThreads); 188 break; 189 } 190 } 191 // copy back 192 *plastiter = lastiter; 193 *plower = lb; 194 *pupper = ub; 195 *pstride = stride; 196 } 197 198 //////////////////////////////////////////////////////////////////////////////// 199 // Support for dispatch Init 200 201 static int OrderedSchedule(kmp_sched_t schedule) { 202 return schedule >= kmp_sched_ordered_first && 203 schedule <= kmp_sched_ordered_last; 204 } 205 206 static void dispatch_init(IdentTy *loc, int32_t threadId, 207 kmp_sched_t schedule, T lb, T ub, ST st, ST chunk, 208 DynamicScheduleTracker *DST) { 209 int tid = mapping::getThreadIdInBlock(); 210 T tnum = omp_get_num_threads(); 211 T tripCount = ub - lb + 1; // +1 because ub is inclusive 212 ASSERT0(LT_FUSSY, threadId < tnum, 213 "current thread is not needed here; error"); 214 215 /* Currently just ignore the monotonic and non-monotonic modifiers 216 * (the compiler isn't producing them * yet anyway). 217 * When it is we'll want to look at them somewhere here and use that 218 * information to add to our schedule choice. We shouldn't need to pass 219 * them on, they merely affect which schedule we can legally choose for 220 * various dynamic cases. (In particular, whether or not a stealing scheme 221 * is legal). 222 */ 223 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 224 225 // Process schedule. 226 if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) { 227 if (OrderedSchedule(schedule)) 228 __kmpc_barrier(loc, threadId); 229 schedule = kmp_sched_static_chunk; 230 chunk = tripCount; // one thread gets the whole loop 231 } else if (schedule == kmp_sched_runtime) { 232 // process runtime 233 omp_sched_t rtSched; 234 int ChunkInt; 235 omp_get_schedule(&rtSched, &ChunkInt); 236 chunk = ChunkInt; 237 switch (rtSched) { 238 case omp_sched_static: { 239 if (chunk > 0) 240 schedule = kmp_sched_static_chunk; 241 else 242 schedule = kmp_sched_static_nochunk; 243 break; 244 } 245 case omp_sched_auto: { 246 schedule = kmp_sched_static_chunk; 247 chunk = 1; 248 break; 249 } 250 case omp_sched_dynamic: 251 case omp_sched_guided: { 252 schedule = kmp_sched_dynamic; 253 break; 254 } 255 } 256 } else if (schedule == kmp_sched_auto) { 257 schedule = kmp_sched_static_chunk; 258 chunk = 1; 259 } else { 260 // ASSERT(LT_FUSSY, 261 // schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 262 // "unknown schedule %d & chunk %lld\n", (int)schedule, 263 // (long long)chunk); 264 } 265 266 // init schedules 267 if (schedule == kmp_sched_static_chunk) { 268 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 269 // save sched state 270 DST->ScheduleType = schedule; 271 // save ub 272 DST->LoopUpperBound = ub; 273 // compute static chunk 274 ST stride; 275 int lastiter = 0; 276 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 277 // save computed params 278 DST->Chunk = chunk; 279 DST->NextLowerBound = lb; 280 DST->Stride = stride; 281 } else if (schedule == kmp_sched_static_balanced_chunk) { 282 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 283 // save sched state 284 DST->ScheduleType = schedule; 285 // save ub 286 DST->LoopUpperBound = ub; 287 // compute static chunk 288 ST stride; 289 int lastiter = 0; 290 // round up to make sure the chunk is enough to cover all iterations 291 T span = (tripCount + tnum - 1) / tnum; 292 // perform chunk adjustment 293 chunk = (span + chunk - 1) & ~(chunk - 1); 294 295 T oldUb = ub; 296 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 297 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 298 if (ub > oldUb) 299 ub = oldUb; 300 // save computed params 301 DST->Chunk = chunk; 302 DST->NextLowerBound = lb; 303 DST->Stride = stride; 304 } else if (schedule == kmp_sched_static_nochunk) { 305 ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value"); 306 // save sched state 307 DST->ScheduleType = schedule; 308 // save ub 309 DST->LoopUpperBound = ub; 310 // compute static chunk 311 ST stride; 312 int lastiter = 0; 313 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 314 // save computed params 315 DST->Chunk = chunk; 316 DST->NextLowerBound = lb; 317 DST->Stride = stride; 318 } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) { 319 // save data 320 DST->ScheduleType = schedule; 321 if (chunk < 1) 322 chunk = 1; 323 DST->Chunk = chunk; 324 DST->LoopUpperBound = ub; 325 DST->NextLowerBound = lb; 326 __kmpc_barrier(loc, threadId); 327 if (tid == 0) { 328 Cnt = 0; 329 fence::team(__ATOMIC_SEQ_CST); 330 } 331 __kmpc_barrier(loc, threadId); 332 } 333 } 334 335 //////////////////////////////////////////////////////////////////////////////// 336 // Support for dispatch next 337 338 static uint64_t NextIter() { 339 __kmpc_impl_lanemask_t active = mapping::activemask(); 340 uint32_t leader = utils::ffs(active) - 1; 341 uint32_t change = utils::popc(active); 342 __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT(); 343 unsigned int rank = utils::popc(active & lane_mask_lt); 344 uint64_t warp_res; 345 if (rank == 0) { 346 warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST); 347 } 348 warp_res = utils::shuffle(active, warp_res, leader); 349 return warp_res + rank; 350 } 351 352 static int DynamicNextChunk(T &lb, T &ub, T chunkSize, T loopLowerBound, 353 T loopUpperBound) { 354 T N = NextIter(); 355 lb = loopLowerBound + N * chunkSize; 356 ub = lb + chunkSize - 1; // Clang uses i <= ub 357 358 // 3 result cases: 359 // a. lb and ub < loopUpperBound --> NOT_FINISHED 360 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk --> 361 // NOT_FINISHED 362 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED 363 // a. 364 if (lb <= loopUpperBound && ub < loopUpperBound) { 365 return NOT_FINISHED; 366 } 367 // b. 368 if (lb <= loopUpperBound) { 369 ub = loopUpperBound; 370 return LAST_CHUNK; 371 } 372 // c. if we are here, we are in case 'c' 373 lb = loopUpperBound + 2; 374 ub = loopUpperBound + 1; 375 return FINISHED; 376 } 377 378 static int dispatch_next(IdentTy *loc, int32_t gtid, int32_t *plast, 379 T *plower, T *pupper, ST *pstride, 380 DynamicScheduleTracker *DST) { 381 // ID of a thread in its own warp 382 383 // automatically selects thread or warp ID based on selected implementation 384 ASSERT0(LT_FUSSY, gtid < omp_get_num_threads(), 385 "current thread is not needed here; error"); 386 // retrieve schedule 387 kmp_sched_t schedule = DST->ScheduleType; 388 389 // xxx reduce to one 390 if (schedule == kmp_sched_static_chunk || 391 schedule == kmp_sched_static_nochunk) { 392 T myLb = DST->NextLowerBound; 393 T ub = DST->LoopUpperBound; 394 // finished? 395 if (myLb > ub) { 396 return DISPATCH_FINISHED; 397 } 398 // not finished, save current bounds 399 ST chunk = DST->Chunk; 400 *plower = myLb; 401 T myUb = myLb + chunk - 1; // Clang uses i <= ub 402 if (myUb > ub) 403 myUb = ub; 404 *pupper = myUb; 405 *plast = (int32_t)(myUb == ub); 406 407 // increment next lower bound by the stride 408 ST stride = DST->Stride; 409 DST->NextLowerBound = myLb + stride; 410 return DISPATCH_NOTFINISHED; 411 } 412 ASSERT0(LT_FUSSY, 413 schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 414 "bad sched"); 415 T myLb, myUb; 416 int finished = DynamicNextChunk(myLb, myUb, DST->Chunk, DST->NextLowerBound, 417 DST->LoopUpperBound); 418 419 if (finished == FINISHED) 420 return DISPATCH_FINISHED; 421 422 // not finished (either not finished or last chunk) 423 *plast = (int32_t)(finished == LAST_CHUNK); 424 *plower = myLb; 425 *pupper = myUb; 426 *pstride = 1; 427 428 return DISPATCH_NOTFINISHED; 429 } 430 431 static void dispatch_fini() { 432 // nothing 433 } 434 435 //////////////////////////////////////////////////////////////////////////////// 436 // end of template class that encapsulate all the helper functions 437 //////////////////////////////////////////////////////////////////////////////// 438 }; 439 440 //////////////////////////////////////////////////////////////////////////////// 441 // KMP interface implementation (dyn loops) 442 //////////////////////////////////////////////////////////////////////////////// 443 444 // TODO: This is a stopgap. We probably want to expand the dispatch API to take 445 // an DST pointer which can then be allocated properly without malloc. 446 DynamicScheduleTracker *THREAD_LOCAL(ThreadDSTPtr); 447 448 // Create a new DST, link the current one, and define the new as current. 449 static DynamicScheduleTracker *pushDST() { 450 DynamicScheduleTracker *NewDST = static_cast<DynamicScheduleTracker *>( 451 memory::allocGlobal(sizeof(DynamicScheduleTracker), "new DST")); 452 *NewDST = DynamicScheduleTracker({0}); 453 NewDST->NextDST = ThreadDSTPtr; 454 ThreadDSTPtr = NewDST; 455 return ThreadDSTPtr; 456 } 457 458 // Return the current DST. 459 static DynamicScheduleTracker *peekDST() { return ThreadDSTPtr; } 460 461 // Pop the current DST and restore the last one. 462 static void popDST() { 463 DynamicScheduleTracker *OldDST = ThreadDSTPtr->NextDST; 464 memory::freeGlobal(ThreadDSTPtr, "remove DST"); 465 ThreadDSTPtr = OldDST; 466 } 467 468 extern "C" { 469 470 // init 471 void __kmpc_dispatch_init_4(IdentTy *loc, int32_t tid, int32_t schedule, 472 int32_t lb, int32_t ub, int32_t st, int32_t chunk) { 473 FunctionTracingRAII(); 474 DynamicScheduleTracker *DST = pushDST(); 475 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_init( 476 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 477 } 478 479 void __kmpc_dispatch_init_4u(IdentTy *loc, int32_t tid, int32_t schedule, 480 uint32_t lb, uint32_t ub, int32_t st, 481 int32_t chunk) { 482 FunctionTracingRAII(); 483 DynamicScheduleTracker *DST = pushDST(); 484 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_init( 485 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 486 } 487 488 void __kmpc_dispatch_init_8(IdentTy *loc, int32_t tid, int32_t schedule, 489 int64_t lb, int64_t ub, int64_t st, int64_t chunk) { 490 FunctionTracingRAII(); 491 DynamicScheduleTracker *DST = pushDST(); 492 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_init( 493 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 494 } 495 496 void __kmpc_dispatch_init_8u(IdentTy *loc, int32_t tid, int32_t schedule, 497 uint64_t lb, uint64_t ub, int64_t st, 498 int64_t chunk) { 499 FunctionTracingRAII(); 500 DynamicScheduleTracker *DST = pushDST(); 501 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_init( 502 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 503 } 504 505 // next 506 int __kmpc_dispatch_next_4(IdentTy *loc, int32_t tid, int32_t *p_last, 507 int32_t *p_lb, int32_t *p_ub, int32_t *p_st) { 508 FunctionTracingRAII(); 509 DynamicScheduleTracker *DST = peekDST(); 510 return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next( 511 loc, tid, p_last, p_lb, p_ub, p_st, DST); 512 } 513 514 int __kmpc_dispatch_next_4u(IdentTy *loc, int32_t tid, int32_t *p_last, 515 uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) { 516 FunctionTracingRAII(); 517 DynamicScheduleTracker *DST = peekDST(); 518 return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next( 519 loc, tid, p_last, p_lb, p_ub, p_st, DST); 520 } 521 522 int __kmpc_dispatch_next_8(IdentTy *loc, int32_t tid, int32_t *p_last, 523 int64_t *p_lb, int64_t *p_ub, int64_t *p_st) { 524 FunctionTracingRAII(); 525 DynamicScheduleTracker *DST = peekDST(); 526 return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next( 527 loc, tid, p_last, p_lb, p_ub, p_st, DST); 528 } 529 530 int __kmpc_dispatch_next_8u(IdentTy *loc, int32_t tid, int32_t *p_last, 531 uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) { 532 FunctionTracingRAII(); 533 DynamicScheduleTracker *DST = peekDST(); 534 return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next( 535 loc, tid, p_last, p_lb, p_ub, p_st, DST); 536 } 537 538 // fini 539 void __kmpc_dispatch_fini_4(IdentTy *loc, int32_t tid) { 540 FunctionTracingRAII(); 541 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini(); 542 popDST(); 543 } 544 545 void __kmpc_dispatch_fini_4u(IdentTy *loc, int32_t tid) { 546 FunctionTracingRAII(); 547 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini(); 548 popDST(); 549 } 550 551 void __kmpc_dispatch_fini_8(IdentTy *loc, int32_t tid) { 552 FunctionTracingRAII(); 553 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini(); 554 popDST(); 555 } 556 557 void __kmpc_dispatch_fini_8u(IdentTy *loc, int32_t tid) { 558 FunctionTracingRAII(); 559 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini(); 560 popDST(); 561 } 562 563 //////////////////////////////////////////////////////////////////////////////// 564 // KMP interface implementation (static loops) 565 //////////////////////////////////////////////////////////////////////////////// 566 567 void __kmpc_for_static_init_4(IdentTy *loc, int32_t global_tid, 568 int32_t schedtype, int32_t *plastiter, 569 int32_t *plower, int32_t *pupper, 570 int32_t *pstride, int32_t incr, int32_t chunk) { 571 FunctionTracingRAII(); 572 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 573 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 574 mapping::isSPMDMode()); 575 } 576 577 void __kmpc_for_static_init_4u(IdentTy *loc, int32_t global_tid, 578 int32_t schedtype, int32_t *plastiter, 579 uint32_t *plower, uint32_t *pupper, 580 int32_t *pstride, int32_t incr, int32_t chunk) { 581 FunctionTracingRAII(); 582 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 583 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 584 mapping::isSPMDMode()); 585 } 586 587 void __kmpc_for_static_init_8(IdentTy *loc, int32_t global_tid, 588 int32_t schedtype, int32_t *plastiter, 589 int64_t *plower, int64_t *pupper, 590 int64_t *pstride, int64_t incr, int64_t chunk) { 591 FunctionTracingRAII(); 592 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 593 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 594 mapping::isSPMDMode()); 595 } 596 597 void __kmpc_for_static_init_8u(IdentTy *loc, int32_t global_tid, 598 int32_t schedtype, int32_t *plastiter, 599 uint64_t *plower, uint64_t *pupper, 600 int64_t *pstride, int64_t incr, int64_t chunk) { 601 FunctionTracingRAII(); 602 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 603 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 604 mapping::isSPMDMode()); 605 } 606 607 void __kmpc_distribute_static_init_4(IdentTy *loc, int32_t global_tid, 608 int32_t schedtype, int32_t *plastiter, 609 int32_t *plower, int32_t *pupper, 610 int32_t *pstride, int32_t incr, 611 int32_t chunk) { 612 FunctionTracingRAII(); 613 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 614 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 615 mapping::isSPMDMode()); 616 } 617 618 void __kmpc_distribute_static_init_4u(IdentTy *loc, int32_t global_tid, 619 int32_t schedtype, int32_t *plastiter, 620 uint32_t *plower, uint32_t *pupper, 621 int32_t *pstride, int32_t incr, 622 int32_t chunk) { 623 FunctionTracingRAII(); 624 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 625 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 626 mapping::isSPMDMode()); 627 } 628 629 void __kmpc_distribute_static_init_8(IdentTy *loc, int32_t global_tid, 630 int32_t schedtype, int32_t *plastiter, 631 int64_t *plower, int64_t *pupper, 632 int64_t *pstride, int64_t incr, 633 int64_t chunk) { 634 FunctionTracingRAII(); 635 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 636 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 637 mapping::isSPMDMode()); 638 } 639 640 void __kmpc_distribute_static_init_8u(IdentTy *loc, int32_t global_tid, 641 int32_t schedtype, int32_t *plastiter, 642 uint64_t *plower, uint64_t *pupper, 643 int64_t *pstride, int64_t incr, 644 int64_t chunk) { 645 FunctionTracingRAII(); 646 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 647 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 648 mapping::isSPMDMode()); 649 } 650 651 void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) { 652 FunctionTracingRAII(); 653 } 654 655 void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) { 656 FunctionTracingRAII(); 657 } 658 } 659 660 #pragma omp end declare target 661