1 //===----- Workshare.cpp - OpenMP workshare implementation ------ C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the KMPC interface 10 // for the loop construct plus other worksharing constructs that use the same 11 // interface as loops. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "Debug.h" 16 #include "Interface.h" 17 #include "Mapping.h" 18 #include "State.h" 19 #include "Synchronization.h" 20 #include "Types.h" 21 #include "Utils.h" 22 23 using namespace _OMP; 24 25 // TODO: 26 struct DynamicScheduleTracker { 27 int64_t Chunk; 28 int64_t LoopUpperBound; 29 int64_t NextLowerBound; 30 int64_t Stride; 31 kmp_sched_t ScheduleType; 32 DynamicScheduleTracker *NextDST; 33 }; 34 35 #define ASSERT0(...) 36 37 // used by the library for the interface with the app 38 #define DISPATCH_FINISHED 0 39 #define DISPATCH_NOTFINISHED 1 40 41 // used by dynamic scheduling 42 #define FINISHED 0 43 #define NOT_FINISHED 1 44 #define LAST_CHUNK 2 45 46 #pragma omp declare target 47 48 // TODO: This variable is a hack inherited from the old runtime. 49 uint64_t SHARED(Cnt); 50 51 template <typename T, typename ST> struct omptarget_nvptx_LoopSupport { 52 //////////////////////////////////////////////////////////////////////////////// 53 // Loop with static scheduling with chunk 54 55 // Generic implementation of OMP loop scheduling with static policy 56 /*! \brief Calculate initial bounds for static loop and stride 57 * @param[in] loc location in code of the call (not used here) 58 * @param[in] global_tid global thread id 59 * @param[in] schetype type of scheduling (see omptarget-nvptx.h) 60 * @param[in] plastiter pointer to last iteration 61 * @param[in,out] pointer to loop lower bound. it will contain value of 62 * lower bound of first chunk 63 * @param[in,out] pointer to loop upper bound. It will contain value of 64 * upper bound of first chunk 65 * @param[in,out] pointer to loop stride. It will contain value of stride 66 * between two successive chunks executed by the same thread 67 * @param[in] loop increment bump 68 * @param[in] chunk size 69 */ 70 71 // helper function for static chunk 72 static void ForStaticChunk(int &last, T &lb, T &ub, ST &stride, ST chunk, 73 T entityId, T numberOfEntities) { 74 // each thread executes multiple chunks all of the same size, except 75 // the last one 76 // distance between two successive chunks 77 stride = numberOfEntities * chunk; 78 lb = lb + entityId * chunk; 79 T inputUb = ub; 80 ub = lb + chunk - 1; // Clang uses i <= ub 81 // Say ub' is the begining of the last chunk. Then who ever has a 82 // lower bound plus a multiple of the increment equal to ub' is 83 // the last one. 84 T beginingLastChunk = inputUb - (inputUb % chunk); 85 last = ((beginingLastChunk - lb) % stride) == 0; 86 } 87 88 //////////////////////////////////////////////////////////////////////////////// 89 // Loop with static scheduling without chunk 90 91 // helper function for static no chunk 92 static void ForStaticNoChunk(int &last, T &lb, T &ub, ST &stride, ST &chunk, 93 T entityId, T numberOfEntities) { 94 // No chunk size specified. Each thread or warp gets at most one 95 // chunk; chunks are all almost of equal size 96 T loopSize = ub - lb + 1; 97 98 chunk = loopSize / numberOfEntities; 99 T leftOver = loopSize - chunk * numberOfEntities; 100 101 if (entityId < leftOver) { 102 chunk++; 103 lb = lb + entityId * chunk; 104 } else { 105 lb = lb + entityId * chunk + leftOver; 106 } 107 108 T inputUb = ub; 109 ub = lb + chunk - 1; // Clang uses i <= ub 110 last = lb <= inputUb && inputUb <= ub; 111 stride = loopSize; // make sure we only do 1 chunk per warp 112 } 113 114 //////////////////////////////////////////////////////////////////////////////// 115 // Support for Static Init 116 117 static void for_static_init(int32_t gtid, int32_t schedtype, 118 int32_t *plastiter, T *plower, T *pupper, 119 ST *pstride, ST chunk, bool IsSPMDExecutionMode) { 120 // When IsRuntimeUninitialized is true, we assume that the caller is 121 // in an L0 parallel region and that all worker threads participate. 122 123 // Assume we are in teams region or that we use a single block 124 // per target region 125 int numberOfActiveOMPThreads = omp_get_num_threads(); 126 127 // All warps that are in excess of the maximum requested, do 128 // not execute the loop 129 ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads, 130 "current thread is not needed here; error"); 131 132 // copy 133 int lastiter = 0; 134 T lb = *plower; 135 T ub = *pupper; 136 ST stride = *pstride; 137 138 // init 139 switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) { 140 case kmp_sched_static_chunk: { 141 if (chunk > 0) { 142 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 143 numberOfActiveOMPThreads); 144 break; 145 } 146 } // note: if chunk <=0, use nochunk 147 case kmp_sched_static_balanced_chunk: { 148 if (chunk > 0) { 149 // round up to make sure the chunk is enough to cover all iterations 150 T tripCount = ub - lb + 1; // +1 because ub is inclusive 151 T span = (tripCount + numberOfActiveOMPThreads - 1) / 152 numberOfActiveOMPThreads; 153 // perform chunk adjustment 154 chunk = (span + chunk - 1) & ~(chunk - 1); 155 156 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 157 T oldUb = ub; 158 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 159 numberOfActiveOMPThreads); 160 if (ub > oldUb) 161 ub = oldUb; 162 break; 163 } 164 } // note: if chunk <=0, use nochunk 165 case kmp_sched_static_nochunk: { 166 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid, 167 numberOfActiveOMPThreads); 168 break; 169 } 170 case kmp_sched_distr_static_chunk: { 171 if (chunk > 0) { 172 ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 173 omp_get_num_teams()); 174 break; 175 } // note: if chunk <=0, use nochunk 176 } 177 case kmp_sched_distr_static_nochunk: { 178 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(), 179 omp_get_num_teams()); 180 break; 181 } 182 case kmp_sched_distr_static_chunk_sched_static_chunkone: { 183 ForStaticChunk(lastiter, lb, ub, stride, chunk, 184 numberOfActiveOMPThreads * omp_get_team_num() + gtid, 185 omp_get_num_teams() * numberOfActiveOMPThreads); 186 break; 187 } 188 default: { 189 // ASSERT(LT_FUSSY, 0, "unknown schedtype %d", (int)schedtype); 190 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, 191 numberOfActiveOMPThreads); 192 break; 193 } 194 } 195 // copy back 196 *plastiter = lastiter; 197 *plower = lb; 198 *pupper = ub; 199 *pstride = stride; 200 } 201 202 //////////////////////////////////////////////////////////////////////////////// 203 // Support for dispatch Init 204 205 static int OrderedSchedule(kmp_sched_t schedule) { 206 return schedule >= kmp_sched_ordered_first && 207 schedule <= kmp_sched_ordered_last; 208 } 209 210 static void dispatch_init(IdentTy *loc, int32_t threadId, 211 kmp_sched_t schedule, T lb, T ub, ST st, ST chunk, 212 DynamicScheduleTracker *DST) { 213 int tid = mapping::getThreadIdInBlock(); 214 T tnum = omp_get_num_threads(); 215 T tripCount = ub - lb + 1; // +1 because ub is inclusive 216 ASSERT0(LT_FUSSY, threadId < tnum, 217 "current thread is not needed here; error"); 218 219 /* Currently just ignore the monotonic and non-monotonic modifiers 220 * (the compiler isn't producing them * yet anyway). 221 * When it is we'll want to look at them somewhere here and use that 222 * information to add to our schedule choice. We shouldn't need to pass 223 * them on, they merely affect which schedule we can legally choose for 224 * various dynamic cases. (In particular, whether or not a stealing scheme 225 * is legal). 226 */ 227 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); 228 229 // Process schedule. 230 if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) { 231 if (OrderedSchedule(schedule)) 232 __kmpc_barrier(loc, threadId); 233 schedule = kmp_sched_static_chunk; 234 chunk = tripCount; // one thread gets the whole loop 235 } else if (schedule == kmp_sched_runtime) { 236 // process runtime 237 omp_sched_t rtSched; 238 int ChunkInt; 239 omp_get_schedule(&rtSched, &ChunkInt); 240 chunk = ChunkInt; 241 switch (rtSched) { 242 case omp_sched_static: { 243 if (chunk > 0) 244 schedule = kmp_sched_static_chunk; 245 else 246 schedule = kmp_sched_static_nochunk; 247 break; 248 } 249 case omp_sched_auto: { 250 schedule = kmp_sched_static_chunk; 251 chunk = 1; 252 break; 253 } 254 case omp_sched_dynamic: 255 case omp_sched_guided: { 256 schedule = kmp_sched_dynamic; 257 break; 258 } 259 } 260 } else if (schedule == kmp_sched_auto) { 261 schedule = kmp_sched_static_chunk; 262 chunk = 1; 263 } else { 264 // ASSERT(LT_FUSSY, 265 // schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 266 // "unknown schedule %d & chunk %lld\n", (int)schedule, 267 // (long long)chunk); 268 } 269 270 // init schedules 271 if (schedule == kmp_sched_static_chunk) { 272 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 273 // save sched state 274 DST->ScheduleType = schedule; 275 // save ub 276 DST->LoopUpperBound = ub; 277 // compute static chunk 278 ST stride; 279 int lastiter = 0; 280 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 281 // save computed params 282 DST->Chunk = chunk; 283 DST->NextLowerBound = lb; 284 DST->Stride = stride; 285 } else if (schedule == kmp_sched_static_balanced_chunk) { 286 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); 287 // save sched state 288 DST->ScheduleType = schedule; 289 // save ub 290 DST->LoopUpperBound = ub; 291 // compute static chunk 292 ST stride; 293 int lastiter = 0; 294 // round up to make sure the chunk is enough to cover all iterations 295 T span = (tripCount + tnum - 1) / tnum; 296 // perform chunk adjustment 297 chunk = (span + chunk - 1) & ~(chunk - 1); 298 299 T oldUb = ub; 300 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 301 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); 302 if (ub > oldUb) 303 ub = oldUb; 304 // save computed params 305 DST->Chunk = chunk; 306 DST->NextLowerBound = lb; 307 DST->Stride = stride; 308 } else if (schedule == kmp_sched_static_nochunk) { 309 ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value"); 310 // save sched state 311 DST->ScheduleType = schedule; 312 // save ub 313 DST->LoopUpperBound = ub; 314 // compute static chunk 315 ST stride; 316 int lastiter = 0; 317 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); 318 // save computed params 319 DST->Chunk = chunk; 320 DST->NextLowerBound = lb; 321 DST->Stride = stride; 322 } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) { 323 // save data 324 DST->ScheduleType = schedule; 325 if (chunk < 1) 326 chunk = 1; 327 DST->Chunk = chunk; 328 DST->LoopUpperBound = ub; 329 DST->NextLowerBound = lb; 330 __kmpc_barrier(loc, threadId); 331 if (tid == 0) { 332 Cnt = 0; 333 fence::team(__ATOMIC_SEQ_CST); 334 } 335 __kmpc_barrier(loc, threadId); 336 } 337 } 338 339 //////////////////////////////////////////////////////////////////////////////// 340 // Support for dispatch next 341 342 static uint64_t NextIter() { 343 __kmpc_impl_lanemask_t active = mapping::activemask(); 344 uint32_t leader = utils::ffs(active) - 1; 345 uint32_t change = utils::popc(active); 346 __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT(); 347 unsigned int rank = utils::popc(active & lane_mask_lt); 348 uint64_t warp_res; 349 if (rank == 0) { 350 warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST); 351 } 352 warp_res = utils::shuffle(active, warp_res, leader); 353 return warp_res + rank; 354 } 355 356 static int DynamicNextChunk(T &lb, T &ub, T chunkSize, T loopLowerBound, 357 T loopUpperBound) { 358 T N = NextIter(); 359 lb = loopLowerBound + N * chunkSize; 360 ub = lb + chunkSize - 1; // Clang uses i <= ub 361 362 // 3 result cases: 363 // a. lb and ub < loopUpperBound --> NOT_FINISHED 364 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk --> 365 // NOT_FINISHED 366 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED 367 // a. 368 if (lb <= loopUpperBound && ub < loopUpperBound) { 369 return NOT_FINISHED; 370 } 371 // b. 372 if (lb <= loopUpperBound) { 373 ub = loopUpperBound; 374 return LAST_CHUNK; 375 } 376 // c. if we are here, we are in case 'c' 377 lb = loopUpperBound + 2; 378 ub = loopUpperBound + 1; 379 return FINISHED; 380 } 381 382 static int dispatch_next(IdentTy *loc, int32_t gtid, int32_t *plast, 383 T *plower, T *pupper, ST *pstride, 384 DynamicScheduleTracker *DST) { 385 // ID of a thread in its own warp 386 387 // automatically selects thread or warp ID based on selected implementation 388 ASSERT0(LT_FUSSY, gtid < omp_get_num_threads(), 389 "current thread is not needed here; error"); 390 // retrieve schedule 391 kmp_sched_t schedule = DST->ScheduleType; 392 393 // xxx reduce to one 394 if (schedule == kmp_sched_static_chunk || 395 schedule == kmp_sched_static_nochunk) { 396 T myLb = DST->NextLowerBound; 397 T ub = DST->LoopUpperBound; 398 // finished? 399 if (myLb > ub) { 400 return DISPATCH_FINISHED; 401 } 402 // not finished, save current bounds 403 ST chunk = DST->Chunk; 404 *plower = myLb; 405 T myUb = myLb + chunk - 1; // Clang uses i <= ub 406 if (myUb > ub) 407 myUb = ub; 408 *pupper = myUb; 409 *plast = (int32_t)(myUb == ub); 410 411 // increment next lower bound by the stride 412 ST stride = DST->Stride; 413 DST->NextLowerBound = myLb + stride; 414 return DISPATCH_NOTFINISHED; 415 } 416 ASSERT0(LT_FUSSY, 417 schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, 418 "bad sched"); 419 T myLb, myUb; 420 int finished = DynamicNextChunk(myLb, myUb, DST->Chunk, DST->NextLowerBound, 421 DST->LoopUpperBound); 422 423 if (finished == FINISHED) 424 return DISPATCH_FINISHED; 425 426 // not finished (either not finished or last chunk) 427 *plast = (int32_t)(finished == LAST_CHUNK); 428 *plower = myLb; 429 *pupper = myUb; 430 *pstride = 1; 431 432 return DISPATCH_NOTFINISHED; 433 } 434 435 static void dispatch_fini() { 436 // nothing 437 } 438 439 //////////////////////////////////////////////////////////////////////////////// 440 // end of template class that encapsulate all the helper functions 441 //////////////////////////////////////////////////////////////////////////////// 442 }; 443 444 //////////////////////////////////////////////////////////////////////////////// 445 // KMP interface implementation (dyn loops) 446 //////////////////////////////////////////////////////////////////////////////// 447 448 // TODO: This is a stopgap. We probably want to expand the dispatch API to take 449 // an DST pointer which can then be allocated properly without malloc. 450 DynamicScheduleTracker *THREAD_LOCAL(ThreadDSTPtr); 451 452 // Create a new DST, link the current one, and define the new as current. 453 static DynamicScheduleTracker *pushDST() { 454 DynamicScheduleTracker *NewDST = static_cast<DynamicScheduleTracker *>( 455 memory::allocGlobal(sizeof(DynamicScheduleTracker), "new DST")); 456 *NewDST = DynamicScheduleTracker({0}); 457 NewDST->NextDST = ThreadDSTPtr; 458 ThreadDSTPtr = NewDST; 459 return ThreadDSTPtr; 460 } 461 462 // Return the current DST. 463 static DynamicScheduleTracker *peekDST() { return ThreadDSTPtr; } 464 465 // Pop the current DST and restore the last one. 466 static void popDST() { 467 DynamicScheduleTracker *OldDST = ThreadDSTPtr->NextDST; 468 memory::freeGlobal(ThreadDSTPtr, "remove DST"); 469 ThreadDSTPtr = OldDST; 470 } 471 472 extern "C" { 473 474 // init 475 void __kmpc_dispatch_init_4(IdentTy *loc, int32_t tid, int32_t schedule, 476 int32_t lb, int32_t ub, int32_t st, int32_t chunk) { 477 DynamicScheduleTracker *DST = pushDST(); 478 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_init( 479 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 480 } 481 482 void __kmpc_dispatch_init_4u(IdentTy *loc, int32_t tid, int32_t schedule, 483 uint32_t lb, uint32_t ub, int32_t st, 484 int32_t chunk) { 485 DynamicScheduleTracker *DST = pushDST(); 486 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_init( 487 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 488 } 489 490 void __kmpc_dispatch_init_8(IdentTy *loc, int32_t tid, int32_t schedule, 491 int64_t lb, int64_t ub, int64_t st, int64_t chunk) { 492 DynamicScheduleTracker *DST = pushDST(); 493 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_init( 494 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 495 } 496 497 void __kmpc_dispatch_init_8u(IdentTy *loc, int32_t tid, int32_t schedule, 498 uint64_t lb, uint64_t ub, int64_t st, 499 int64_t chunk) { 500 DynamicScheduleTracker *DST = pushDST(); 501 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_init( 502 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST); 503 } 504 505 // next 506 int __kmpc_dispatch_next_4(IdentTy *loc, int32_t tid, int32_t *p_last, 507 int32_t *p_lb, int32_t *p_ub, int32_t *p_st) { 508 DynamicScheduleTracker *DST = peekDST(); 509 return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next( 510 loc, tid, p_last, p_lb, p_ub, p_st, DST); 511 } 512 513 int __kmpc_dispatch_next_4u(IdentTy *loc, int32_t tid, int32_t *p_last, 514 uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) { 515 DynamicScheduleTracker *DST = peekDST(); 516 return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next( 517 loc, tid, p_last, p_lb, p_ub, p_st, DST); 518 } 519 520 int __kmpc_dispatch_next_8(IdentTy *loc, int32_t tid, int32_t *p_last, 521 int64_t *p_lb, int64_t *p_ub, int64_t *p_st) { 522 DynamicScheduleTracker *DST = peekDST(); 523 return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next( 524 loc, tid, p_last, p_lb, p_ub, p_st, DST); 525 } 526 527 int __kmpc_dispatch_next_8u(IdentTy *loc, int32_t tid, int32_t *p_last, 528 uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) { 529 DynamicScheduleTracker *DST = peekDST(); 530 return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next( 531 loc, tid, p_last, p_lb, p_ub, p_st, DST); 532 } 533 534 // fini 535 void __kmpc_dispatch_fini_4(IdentTy *loc, int32_t tid) { 536 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini(); 537 popDST(); 538 } 539 540 void __kmpc_dispatch_fini_4u(IdentTy *loc, int32_t tid) { 541 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini(); 542 popDST(); 543 } 544 545 void __kmpc_dispatch_fini_8(IdentTy *loc, int32_t tid) { 546 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini(); 547 popDST(); 548 } 549 550 void __kmpc_dispatch_fini_8u(IdentTy *loc, int32_t tid) { 551 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini(); 552 popDST(); 553 } 554 555 //////////////////////////////////////////////////////////////////////////////// 556 // KMP interface implementation (static loops) 557 //////////////////////////////////////////////////////////////////////////////// 558 559 void __kmpc_for_static_init_4(IdentTy *loc, int32_t global_tid, 560 int32_t schedtype, int32_t *plastiter, 561 int32_t *plower, int32_t *pupper, 562 int32_t *pstride, int32_t incr, int32_t chunk) { 563 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 564 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 565 mapping::isSPMDMode()); 566 } 567 568 void __kmpc_for_static_init_4u(IdentTy *loc, int32_t global_tid, 569 int32_t schedtype, int32_t *plastiter, 570 uint32_t *plower, uint32_t *pupper, 571 int32_t *pstride, int32_t incr, int32_t chunk) { 572 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 573 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 574 mapping::isSPMDMode()); 575 } 576 577 void __kmpc_for_static_init_8(IdentTy *loc, int32_t global_tid, 578 int32_t schedtype, int32_t *plastiter, 579 int64_t *plower, int64_t *pupper, 580 int64_t *pstride, int64_t incr, int64_t chunk) { 581 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 582 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 583 mapping::isSPMDMode()); 584 } 585 586 void __kmpc_for_static_init_8u(IdentTy *loc, int32_t global_tid, 587 int32_t schedtype, int32_t *plastiter, 588 uint64_t *plower, uint64_t *pupper, 589 int64_t *pstride, int64_t incr, int64_t chunk) { 590 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 591 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 592 mapping::isSPMDMode()); 593 } 594 595 void __kmpc_distribute_static_init_4(IdentTy *loc, int32_t global_tid, 596 int32_t schedtype, int32_t *plastiter, 597 int32_t *plower, int32_t *pupper, 598 int32_t *pstride, int32_t incr, 599 int32_t chunk) { 600 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( 601 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 602 mapping::isSPMDMode()); 603 } 604 605 void __kmpc_distribute_static_init_4u(IdentTy *loc, int32_t global_tid, 606 int32_t schedtype, int32_t *plastiter, 607 uint32_t *plower, uint32_t *pupper, 608 int32_t *pstride, int32_t incr, 609 int32_t chunk) { 610 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( 611 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 612 mapping::isSPMDMode()); 613 } 614 615 void __kmpc_distribute_static_init_8(IdentTy *loc, int32_t global_tid, 616 int32_t schedtype, int32_t *plastiter, 617 int64_t *plower, int64_t *pupper, 618 int64_t *pstride, int64_t incr, 619 int64_t chunk) { 620 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( 621 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 622 mapping::isSPMDMode()); 623 } 624 625 void __kmpc_distribute_static_init_8u(IdentTy *loc, int32_t global_tid, 626 int32_t schedtype, int32_t *plastiter, 627 uint64_t *plower, uint64_t *pupper, 628 int64_t *pstride, int64_t incr, 629 int64_t chunk) { 630 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( 631 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, 632 mapping::isSPMDMode()); 633 } 634 635 void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) {} 636 637 void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) {} 638 } 639 640 #pragma omp end declare target 641