1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU 3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX 4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED 5; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED 6 7;; void unknown(void); 8;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable"))); 9;; 10;; void sequential_loop() { 11;; #pragma omp target teams 12;; { 13;; for (int i = 0; i < 100; ++i) { 14;; #pragma omp parallel 15;; { 16;; unknown(); 17;; } 18;; } 19;; spmd_amenable(); 20;; } 21;; } 22;; 23;; void use(__attribute__((noescape)) int *) __attribute__((assume("ompx_spmd_amenable"))); 24;; 25;; void sequential_loop_to_stack_var() { 26;; #pragma omp target teams 27;; { 28;; int x; 29;; use(&x); 30;; for (int i = 0; i < 100; ++i) { 31;; #pragma omp parallel 32;; { 33;; unknown(); 34;; } 35;; } 36;; spmd_amenable(); 37;; } 38;; } 39;; 40;; void sequential_loop_to_shared_var() { 41;; #pragma omp target teams 42;; { 43;; int x; 44;; for (int i = 0; i < 100; ++i) { 45;; #pragma omp parallel 46;; { 47;; x++; 48;; unknown(); 49;; } 50;; } 51;; spmd_amenable(); 52;; } 53;; } 54;; 55;; void sequential_loop_to_shared_var_guarded() { 56;; #pragma omp target teams 57;; { 58;; int x = 42; 59;; for (int i = 0; i < 100; ++i) { 60;; #pragma omp parallel 61;; { 62;; x++; 63;; unknown(); 64;; } 65;; } 66;; spmd_amenable(); 67;; } 68;; } 69;; 70;; void do_not_spmdize_target() { 71;; #pragma omp target teams 72;; { 73;; // Incompatible parallel level, called both 74;; // from parallel and target regions 75;; unknown(); 76;; } 77;; } 78;; 79;; void do_not_spmdize_task() { 80;; #pragma omp target 81;; { 82;; #pragma omp task 83;; spmd_amenable(); 84;; #pragma omp parallel 85;; unknown(); 86;; } 87;; } 88 89%struct.ident_t = type { i32, i32, i32, i32, i8* } 90%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t } 91%struct.kmp_task_t = type { i8*, i32 (i32, i8*)*, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } 92%union.kmp_cmplrdata_t = type { i32 (i32, i8*)* } 93%struct.anon = type {} 94 95@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 96@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 97@__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1 98@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 99@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 100@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 101@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 102@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1 103@llvm.compiler.used = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 104 105; Function Attrs: alwaysinline convergent norecurse nounwind 106;. 107; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 108; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 109; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 110; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 111; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 112; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 113; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 114; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 115; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 116; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 117; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 118; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 119; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 120;. 121; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 122; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 123; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 124; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 125; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 126; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 127; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 128; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 129; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 130; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 131; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 132; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 133; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 134;. 135; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 136; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 137; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 138; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 139; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 140; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 141; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 142; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 143; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 144; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 145; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 146; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 147; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 148; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 149; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 150; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 151;. 152; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 153; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 154; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 155; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 156; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 157; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 158; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 159; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 160; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 161; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 162; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 163; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 164; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 165; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 166; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 167; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 168;. 169define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { 170; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 171; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { 172; AMDGPU-NEXT: entry: 173; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 174; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 175; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 176; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 177; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 178; AMDGPU: common.ret: 179; AMDGPU-NEXT: ret void 180; AMDGPU: user_code.entry: 181; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 182; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 183; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 184; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 185; AMDGPU-NEXT: br label [[COMMON_RET]] 186; 187; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 188; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { 189; NVPTX-NEXT: entry: 190; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 191; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 192; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 193; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 194; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 195; NVPTX: common.ret: 196; NVPTX-NEXT: ret void 197; NVPTX: user_code.entry: 198; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 199; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 200; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 201; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 202; NVPTX-NEXT: br label [[COMMON_RET]] 203; 204; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 205; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 206; AMDGPU-DISABLED-NEXT: entry: 207; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 208; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 209; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 210; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 211; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 212; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 213; AMDGPU-DISABLED: is_worker_check: 214; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 215; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 216; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 217; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 218; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 219; AMDGPU-DISABLED: worker_state_machine.begin: 220; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 221; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 222; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 223; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 224; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 225; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 226; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 227; AMDGPU-DISABLED: worker_state_machine.finished: 228; AMDGPU-DISABLED-NEXT: ret void 229; AMDGPU-DISABLED: worker_state_machine.is_active.check: 230; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 231; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 232; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 233; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 234; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 235; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 236; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 237; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 238; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 239; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 240; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 241; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 242; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 243; AMDGPU-DISABLED: worker_state_machine.done.barrier: 244; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 245; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 246; AMDGPU-DISABLED: thread.user_code.check: 247; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 248; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 249; AMDGPU-DISABLED: common.ret: 250; AMDGPU-DISABLED-NEXT: ret void 251; AMDGPU-DISABLED: user_code.entry: 252; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 253; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 254; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 255; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 256; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 257; 258; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 259; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 260; NVPTX-DISABLED-NEXT: entry: 261; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 262; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 263; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 264; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 265; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 266; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 267; NVPTX-DISABLED: is_worker_check: 268; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 269; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 270; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 271; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 272; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 273; NVPTX-DISABLED: worker_state_machine.begin: 274; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 275; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 276; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 277; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 278; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 279; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 280; NVPTX-DISABLED: worker_state_machine.finished: 281; NVPTX-DISABLED-NEXT: ret void 282; NVPTX-DISABLED: worker_state_machine.is_active.check: 283; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 284; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 285; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 286; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 287; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 288; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 289; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 290; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 291; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 292; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 293; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 294; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 295; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 296; NVPTX-DISABLED: worker_state_machine.done.barrier: 297; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 298; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 299; NVPTX-DISABLED: thread.user_code.check: 300; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 301; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 302; NVPTX-DISABLED: common.ret: 303; NVPTX-DISABLED-NEXT: ret void 304; NVPTX-DISABLED: user_code.entry: 305; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 306; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 307; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 308; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 309; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 310; 311entry: 312 %.zero.addr = alloca i32, align 4 313 %.threadid_temp. = alloca i32, align 4 314 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 315 %exec_user_code = icmp eq i32 %0, -1 316 br i1 %exec_user_code, label %user_code.entry, label %common.ret 317 318common.ret: ; preds = %entry, %user_code.entry 319 ret void 320 321user_code.entry: ; preds = %entry 322 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 323 store i32 0, i32* %.zero.addr, align 4 324 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 325 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6 326 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 327 br label %common.ret 328} 329 330; Function Attrs: alwaysinline convergent norecurse nounwind 331define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 332; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ 333; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 334; AMDGPU-NEXT: entry: 335; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 336; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 337; AMDGPU: for.cond: 338; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 339; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 340; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 341; AMDGPU: for.cond.cleanup: 342; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 343; AMDGPU-NEXT: ret void 344; AMDGPU: for.body: 345; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 346; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 347; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) 348; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 349; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 350; 351; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ 352; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 353; NVPTX-NEXT: entry: 354; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 355; NVPTX-NEXT: br label [[FOR_COND:%.*]] 356; NVPTX: for.cond: 357; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 358; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 359; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 360; NVPTX: for.cond.cleanup: 361; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 362; NVPTX-NEXT: ret void 363; NVPTX: for.body: 364; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 365; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 366; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) 367; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 368; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 369; 370; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 371; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 372; AMDGPU-DISABLED-NEXT: entry: 373; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 374; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 375; AMDGPU-DISABLED: for.cond: 376; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 377; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 378; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 379; AMDGPU-DISABLED: for.cond.cleanup: 380; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 381; AMDGPU-DISABLED-NEXT: ret void 382; AMDGPU-DISABLED: for.body: 383; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 384; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 385; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) 386; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 387; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 388; 389; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 390; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 391; NVPTX-DISABLED-NEXT: entry: 392; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 393; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 394; NVPTX-DISABLED: for.cond: 395; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 396; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 397; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 398; NVPTX-DISABLED: for.cond.cleanup: 399; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 400; NVPTX-DISABLED-NEXT: ret void 401; NVPTX-DISABLED: for.body: 402; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 403; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 404; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) 405; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 406; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 407; 408entry: 409 %captured_vars_addrs = alloca [0 x i8*], align 8 410 br label %for.cond 411 412for.cond: ; preds = %for.body, %entry 413 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 414 %cmp = icmp slt i32 %i.0, 100 415 br i1 %cmp, label %for.body, label %for.cond.cleanup 416 417for.cond.cleanup: ; preds = %for.cond 418 call void @spmd_amenable() #10 419 ret void 420 421for.body: ; preds = %for.cond 422 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 423 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 424 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0) 425 %inc = add nsw i32 %i.0, 1 426 br label %for.cond, !llvm.loop !22 427} 428 429; Function Attrs: alwaysinline convergent norecurse nounwind 430define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 431; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 432; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 433; AMDGPU-NEXT: entry: 434; AMDGPU-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 435; AMDGPU-NEXT: ret void 436; 437; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 438; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 439; NVPTX-NEXT: entry: 440; NVPTX-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 441; NVPTX-NEXT: ret void 442; 443; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 444; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 445; AMDGPU-DISABLED-NEXT: entry: 446; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 447; AMDGPU-DISABLED-NEXT: ret void 448; 449; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 450; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 451; NVPTX-DISABLED-NEXT: entry: 452; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 453; NVPTX-DISABLED-NEXT: ret void 454; 455entry: 456 call void @unknown() #11 457 ret void 458} 459 460; Function Attrs: convergent norecurse nounwind 461define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { 462; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 463; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 464; AMDGPU-NEXT: entry: 465; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 466; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 467; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 468; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 469; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 470; AMDGPU-NEXT: ret void 471; 472; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 473; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 474; NVPTX-NEXT: entry: 475; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 476; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 477; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 478; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 479; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 480; NVPTX-NEXT: ret void 481; 482; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 483; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 484; AMDGPU-DISABLED-NEXT: entry: 485; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 486; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 487; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 488; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 489; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 490; AMDGPU-DISABLED-NEXT: ret void 491; 492; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 493; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 494; NVPTX-DISABLED-NEXT: entry: 495; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 496; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 497; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 498; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 499; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 500; NVPTX-DISABLED-NEXT: ret void 501; 502entry: 503 %.addr1 = alloca i32, align 4 504 %.zero.addr = alloca i32, align 4 505 %global_args = alloca i8**, align 8 506 store i32 %1, i32* %.addr1, align 4, !tbaa !18 507 store i32 0, i32* %.zero.addr, align 4 508 call void @__kmpc_get_shared_variables(i8*** %global_args) 509 call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #6 510 ret void 511} 512 513; Function Attrs: alwaysinline convergent norecurse nounwind 514define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { 515; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 516; AMDGPU-SAME: () #[[ATTR0]] { 517; AMDGPU-NEXT: entry: 518; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 519; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 520; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 521; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 522; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 523; AMDGPU: common.ret: 524; AMDGPU-NEXT: ret void 525; AMDGPU: user_code.entry: 526; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 527; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 528; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 529; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 530; AMDGPU-NEXT: br label [[COMMON_RET]] 531; 532; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 533; NVPTX-SAME: () #[[ATTR0]] { 534; NVPTX-NEXT: entry: 535; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 536; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 537; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 538; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 539; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 540; NVPTX: common.ret: 541; NVPTX-NEXT: ret void 542; NVPTX: user_code.entry: 543; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 544; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 545; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 546; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 547; NVPTX-NEXT: br label [[COMMON_RET]] 548; 549; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 550; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 551; AMDGPU-DISABLED-NEXT: entry: 552; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 553; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 554; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 555; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 556; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 557; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 558; AMDGPU-DISABLED: is_worker_check: 559; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 560; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 561; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 562; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 563; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 564; AMDGPU-DISABLED: worker_state_machine.begin: 565; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 566; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 567; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 568; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 569; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 570; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 571; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 572; AMDGPU-DISABLED: worker_state_machine.finished: 573; AMDGPU-DISABLED-NEXT: ret void 574; AMDGPU-DISABLED: worker_state_machine.is_active.check: 575; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 576; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 577; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 578; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 579; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 580; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 581; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 582; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 583; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 584; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 585; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 586; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 587; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 588; AMDGPU-DISABLED: worker_state_machine.done.barrier: 589; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 590; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 591; AMDGPU-DISABLED: thread.user_code.check: 592; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 593; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 594; AMDGPU-DISABLED: common.ret: 595; AMDGPU-DISABLED-NEXT: ret void 596; AMDGPU-DISABLED: user_code.entry: 597; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 598; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 599; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 600; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 601; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 602; 603; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 604; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 605; NVPTX-DISABLED-NEXT: entry: 606; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 607; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 608; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 609; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 610; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 611; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 612; NVPTX-DISABLED: is_worker_check: 613; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 614; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 615; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 616; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 617; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 618; NVPTX-DISABLED: worker_state_machine.begin: 619; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 620; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 621; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 622; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 623; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 624; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 625; NVPTX-DISABLED: worker_state_machine.finished: 626; NVPTX-DISABLED-NEXT: ret void 627; NVPTX-DISABLED: worker_state_machine.is_active.check: 628; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 629; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 630; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 631; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 632; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 633; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 634; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 635; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 636; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 637; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 638; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 639; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 640; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 641; NVPTX-DISABLED: worker_state_machine.done.barrier: 642; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 643; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 644; NVPTX-DISABLED: thread.user_code.check: 645; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 646; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 647; NVPTX-DISABLED: common.ret: 648; NVPTX-DISABLED-NEXT: ret void 649; NVPTX-DISABLED: user_code.entry: 650; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 651; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 652; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 653; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 654; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 655; 656entry: 657 %.zero.addr = alloca i32, align 4 658 %.threadid_temp. = alloca i32, align 4 659 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 660 %exec_user_code = icmp eq i32 %0, -1 661 br i1 %exec_user_code, label %user_code.entry, label %common.ret 662 663common.ret: ; preds = %entry, %user_code.entry 664 ret void 665 666user_code.entry: ; preds = %entry 667 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 668 store i32 0, i32* %.zero.addr, align 4 669 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 670 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6 671 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 672 br label %common.ret 673} 674 675; Function Attrs: alwaysinline convergent norecurse nounwind 676define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 677; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 678; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 679; AMDGPU-NEXT: entry: 680; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 681; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 682; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* 683; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* 684; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 685; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 686; AMDGPU: for.cond: 687; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 688; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 689; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 690; AMDGPU: for.cond.cleanup: 691; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 692; AMDGPU-NEXT: ret void 693; AMDGPU: for.body: 694; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 695; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 696; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) 697; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 698; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 699; 700; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 701; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 702; NVPTX-NEXT: entry: 703; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 704; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 705; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 706; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 707; NVPTX-NEXT: br label [[FOR_COND:%.*]] 708; NVPTX: for.cond: 709; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 710; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 711; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 712; NVPTX: for.cond.cleanup: 713; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 714; NVPTX-NEXT: ret void 715; NVPTX: for.body: 716; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 717; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 718; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) 719; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 720; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 721; 722; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 723; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 724; AMDGPU-DISABLED-NEXT: entry: 725; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 726; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 727; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* 728; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* 729; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 730; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 731; AMDGPU-DISABLED: for.cond: 732; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 733; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 734; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 735; AMDGPU-DISABLED: for.cond.cleanup: 736; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 737; AMDGPU-DISABLED-NEXT: ret void 738; AMDGPU-DISABLED: for.body: 739; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 740; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 741; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) 742; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 743; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 744; 745; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 746; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 747; NVPTX-DISABLED-NEXT: entry: 748; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 749; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 750; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 751; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 752; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 753; NVPTX-DISABLED: for.cond: 754; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 755; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 756; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 757; NVPTX-DISABLED: for.cond.cleanup: 758; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 759; NVPTX-DISABLED-NEXT: ret void 760; NVPTX-DISABLED: for.body: 761; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 762; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 763; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) 764; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 765; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 766; 767entry: 768 %captured_vars_addrs = alloca [0 x i8*], align 8 769 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 770 %x_on_stack = bitcast i8* %x to i32* 771 call void @use(i32* nocapture %x_on_stack) #10 772 br label %for.cond 773 774for.cond: ; preds = %for.body, %entry 775 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 776 %cmp = icmp slt i32 %i.0, 100 777 br i1 %cmp, label %for.body, label %for.cond.cleanup 778 779for.cond.cleanup: ; preds = %for.cond 780 call void @spmd_amenable() #10 781 call void @__kmpc_free_shared(i8* %x, i64 4) 782 ret void 783 784for.body: ; preds = %for.cond 785 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 786 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 787 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) 788 %inc = add nsw i32 %i.0, 1 789 br label %for.cond, !llvm.loop !25 790} 791; Function Attrs: alwaysinline convergent norecurse nounwind 792define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 793; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 794; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 795; AMDGPU-NEXT: entry: 796; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 797; AMDGPU-NEXT: ret void 798; 799; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 800; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 801; NVPTX-NEXT: entry: 802; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 803; NVPTX-NEXT: ret void 804; 805; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 806; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 807; AMDGPU-DISABLED-NEXT: entry: 808; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 809; AMDGPU-DISABLED-NEXT: ret void 810; 811; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 812; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 813; NVPTX-DISABLED-NEXT: entry: 814; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 815; NVPTX-DISABLED-NEXT: ret void 816; 817entry: 818 call void @unknown() #11 819 ret void 820} 821 822; Function Attrs: convergent norecurse nounwind 823define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { 824; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 825; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 826; AMDGPU-NEXT: entry: 827; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 828; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 829; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 830; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 831; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 832; AMDGPU-NEXT: ret void 833; 834; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 835; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 836; NVPTX-NEXT: entry: 837; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 838; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 839; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 840; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 841; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 842; NVPTX-NEXT: ret void 843; 844; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 845; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 846; AMDGPU-DISABLED-NEXT: entry: 847; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 848; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 849; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 850; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 851; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 852; AMDGPU-DISABLED-NEXT: ret void 853; 854; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 855; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 856; NVPTX-DISABLED-NEXT: entry: 857; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 858; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 859; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 860; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 861; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 862; NVPTX-DISABLED-NEXT: ret void 863; 864entry: 865 %.addr1 = alloca i32, align 4 866 %.zero.addr = alloca i32, align 4 867 %global_args = alloca i8**, align 8 868 store i32 %1, i32* %.addr1, align 4, !tbaa !18 869 store i32 0, i32* %.zero.addr, align 4 870 call void @__kmpc_get_shared_variables(i8*** %global_args) 871 call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #6 872 ret void 873} 874 875 876; Function Attrs: alwaysinline convergent norecurse nounwind 877define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { 878; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 879; AMDGPU-SAME: () #[[ATTR0]] { 880; AMDGPU-NEXT: entry: 881; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 882; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 883; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 884; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 885; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 886; AMDGPU: common.ret: 887; AMDGPU-NEXT: ret void 888; AMDGPU: user_code.entry: 889; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 890; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 891; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 892; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 893; AMDGPU-NEXT: br label [[COMMON_RET]] 894; 895; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 896; NVPTX-SAME: () #[[ATTR0]] { 897; NVPTX-NEXT: entry: 898; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 899; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 900; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 901; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 902; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 903; NVPTX: common.ret: 904; NVPTX-NEXT: ret void 905; NVPTX: user_code.entry: 906; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 907; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 908; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 909; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 910; NVPTX-NEXT: br label [[COMMON_RET]] 911; 912; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 913; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 914; AMDGPU-DISABLED-NEXT: entry: 915; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 916; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 917; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 918; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 919; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 920; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 921; AMDGPU-DISABLED: is_worker_check: 922; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 923; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 924; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 925; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 926; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 927; AMDGPU-DISABLED: worker_state_machine.begin: 928; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 929; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 930; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 931; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 932; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 933; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 934; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 935; AMDGPU-DISABLED: worker_state_machine.finished: 936; AMDGPU-DISABLED-NEXT: ret void 937; AMDGPU-DISABLED: worker_state_machine.is_active.check: 938; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 939; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 940; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 941; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 942; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 943; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 944; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 945; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 946; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 947; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 948; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 949; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 950; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 951; AMDGPU-DISABLED: worker_state_machine.done.barrier: 952; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 953; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 954; AMDGPU-DISABLED: thread.user_code.check: 955; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 956; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 957; AMDGPU-DISABLED: common.ret: 958; AMDGPU-DISABLED-NEXT: ret void 959; AMDGPU-DISABLED: user_code.entry: 960; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 961; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 962; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 963; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 964; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 965; 966; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 967; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 968; NVPTX-DISABLED-NEXT: entry: 969; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 970; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 971; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 972; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 973; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 974; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 975; NVPTX-DISABLED: is_worker_check: 976; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 977; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 978; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 979; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 980; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 981; NVPTX-DISABLED: worker_state_machine.begin: 982; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 983; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 984; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 985; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 986; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 987; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 988; NVPTX-DISABLED: worker_state_machine.finished: 989; NVPTX-DISABLED-NEXT: ret void 990; NVPTX-DISABLED: worker_state_machine.is_active.check: 991; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 992; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 993; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 994; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 995; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 996; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 997; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 998; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 999; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1000; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1001; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1002; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1003; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1004; NVPTX-DISABLED: worker_state_machine.done.barrier: 1005; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1006; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1007; NVPTX-DISABLED: thread.user_code.check: 1008; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1009; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1010; NVPTX-DISABLED: common.ret: 1011; NVPTX-DISABLED-NEXT: ret void 1012; NVPTX-DISABLED: user_code.entry: 1013; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1014; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1015; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1016; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1017; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1018; 1019entry: 1020 %.zero.addr = alloca i32, align 4 1021 %.threadid_temp. = alloca i32, align 4 1022 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1023 %exec_user_code = icmp eq i32 %0, -1 1024 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1025 1026common.ret: ; preds = %entry, %user_code.entry 1027 ret void 1028 1029user_code.entry: ; preds = %entry 1030 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1031 store i32 0, i32* %.zero.addr, align 4 1032 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1033 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6 1034 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1035 br label %common.ret 1036} 1037 1038; Function Attrs: alwaysinline convergent norecurse nounwind 1039define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1040; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 1041; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1042; AMDGPU-NEXT: entry: 1043; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1044; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1045; AMDGPU: for.cond: 1046; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1047; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1048; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1049; AMDGPU: for.cond.cleanup: 1050; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 1051; AMDGPU-NEXT: ret void 1052; AMDGPU: for.body: 1053; AMDGPU-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1054; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1055; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1056; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1057; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) 1058; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1059; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1060; 1061; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 1062; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1063; NVPTX-NEXT: entry: 1064; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1065; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1066; NVPTX: for.cond: 1067; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1068; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1069; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1070; NVPTX: for.cond.cleanup: 1071; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 1072; NVPTX-NEXT: ret void 1073; NVPTX: for.body: 1074; NVPTX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1075; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1076; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1077; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1078; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) 1079; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1080; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1081; 1082; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1083; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1084; AMDGPU-DISABLED-NEXT: entry: 1085; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1086; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1087; AMDGPU-DISABLED: for.cond: 1088; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1089; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1090; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1091; AMDGPU-DISABLED: for.cond.cleanup: 1092; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1093; AMDGPU-DISABLED-NEXT: ret void 1094; AMDGPU-DISABLED: for.body: 1095; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1096; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1097; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1098; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1099; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) 1100; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1101; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1102; 1103; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1104; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1105; NVPTX-DISABLED-NEXT: entry: 1106; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1107; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1108; NVPTX-DISABLED: for.cond: 1109; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1110; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1111; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1112; NVPTX-DISABLED: for.cond.cleanup: 1113; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1114; NVPTX-DISABLED-NEXT: ret void 1115; NVPTX-DISABLED: for.body: 1116; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1117; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1118; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1119; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1120; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) 1121; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1122; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1123; 1124entry: 1125 %captured_vars_addrs = alloca [1 x i8*], align 8 1126 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1127 %x_on_stack = bitcast i8* %x to i32* 1128 br label %for.cond 1129 1130for.cond: ; preds = %for.body, %entry 1131 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1132 %cmp = icmp slt i32 %i.0, 100 1133 br i1 %cmp, label %for.body, label %for.cond.cleanup 1134 1135for.cond.cleanup: ; preds = %for.cond 1136 call void @spmd_amenable() #10 1137 call void @__kmpc_free_shared(i8* %x, i64 4) 1138 ret void 1139 1140for.body: ; preds = %for.cond 1141 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1142 store i8* %x, i8** %0, align 8, !tbaa !26 1143 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1144 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1145 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 1) 1146 %inc = add nsw i32 %i.0, 1 1147 br label %for.cond, !llvm.loop !28 1148} 1149 1150; Function Attrs: alwaysinline convergent norecurse nounwind 1151define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1152; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 1153; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1154; AMDGPU-NEXT: entry: 1155; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1156; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1157; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1158; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1159; AMDGPU-NEXT: ret void 1160; 1161; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 1162; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1163; NVPTX-NEXT: entry: 1164; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1165; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1166; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1167; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1168; NVPTX-NEXT: ret void 1169; 1170; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1171; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1172; AMDGPU-DISABLED-NEXT: entry: 1173; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1174; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1175; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1176; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1177; AMDGPU-DISABLED-NEXT: ret void 1178; 1179; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1180; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1181; NVPTX-DISABLED-NEXT: entry: 1182; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1183; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1184; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1185; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1186; NVPTX-DISABLED-NEXT: ret void 1187; 1188entry: 1189 %0 = load i32, i32* %x, align 4, !tbaa !18 1190 %inc = add nsw i32 %0, 1 1191 store i32 %inc, i32* %x, align 4, !tbaa !18 1192 call void @unknown() #11 1193 ret void 1194} 1195 1196; Function Attrs: convergent norecurse nounwind 1197define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { 1198; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1199; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1200; AMDGPU-NEXT: entry: 1201; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1202; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1203; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1204; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1205; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1206; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1207; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1208; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1209; AMDGPU-NEXT: ret void 1210; 1211; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1212; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1213; NVPTX-NEXT: entry: 1214; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1215; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1216; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1217; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1218; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1219; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1220; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1221; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1222; NVPTX-NEXT: ret void 1223; 1224; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1225; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1226; AMDGPU-DISABLED-NEXT: entry: 1227; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1228; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1229; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1230; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1231; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1232; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1233; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1234; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1235; AMDGPU-DISABLED-NEXT: ret void 1236; 1237; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1238; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1239; NVPTX-DISABLED-NEXT: entry: 1240; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1241; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1242; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1243; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1244; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1245; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1246; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1247; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1248; NVPTX-DISABLED-NEXT: ret void 1249; 1250entry: 1251 %.addr1 = alloca i32, align 4 1252 %.zero.addr = alloca i32, align 4 1253 %global_args = alloca i8**, align 8 1254 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1255 store i32 0, i32* %.zero.addr, align 4 1256 call void @__kmpc_get_shared_variables(i8*** %global_args) 1257 %2 = load i8**, i8*** %global_args, align 8 1258 %3 = bitcast i8** %2 to i32** 1259 %4 = load i32*, i32** %3, align 8, !tbaa !26 1260 call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1261 ret void 1262} 1263 1264; Function Attrs: alwaysinline convergent norecurse nounwind 1265define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { 1266; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1267; AMDGPU-SAME: () #[[ATTR0]] { 1268; AMDGPU-NEXT: entry: 1269; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1270; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1271; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1272; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1273; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1274; AMDGPU: common.ret: 1275; AMDGPU-NEXT: ret void 1276; AMDGPU: user_code.entry: 1277; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1278; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1279; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1280; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1281; AMDGPU-NEXT: br label [[COMMON_RET]] 1282; 1283; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1284; NVPTX-SAME: () #[[ATTR0]] { 1285; NVPTX-NEXT: entry: 1286; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1287; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1288; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1289; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1290; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1291; NVPTX: common.ret: 1292; NVPTX-NEXT: ret void 1293; NVPTX: user_code.entry: 1294; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1295; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1296; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1297; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1298; NVPTX-NEXT: br label [[COMMON_RET]] 1299; 1300; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1301; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1302; AMDGPU-DISABLED-NEXT: entry: 1303; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1304; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1305; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1306; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1307; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1308; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1309; AMDGPU-DISABLED: is_worker_check: 1310; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1311; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1312; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1313; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1314; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1315; AMDGPU-DISABLED: worker_state_machine.begin: 1316; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1317; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1318; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1319; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1320; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1321; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1322; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1323; AMDGPU-DISABLED: worker_state_machine.finished: 1324; AMDGPU-DISABLED-NEXT: ret void 1325; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1326; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1327; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 1328; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1329; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1330; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 1331; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1332; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1333; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1334; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1335; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1336; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1337; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1338; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1339; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1340; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1341; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1342; AMDGPU-DISABLED: thread.user_code.check: 1343; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1344; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1345; AMDGPU-DISABLED: common.ret: 1346; AMDGPU-DISABLED-NEXT: ret void 1347; AMDGPU-DISABLED: user_code.entry: 1348; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1349; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1350; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1351; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1352; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1353; 1354; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1355; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1356; NVPTX-DISABLED-NEXT: entry: 1357; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1358; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1359; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1360; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1361; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1362; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1363; NVPTX-DISABLED: is_worker_check: 1364; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1365; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1366; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1367; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1368; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1369; NVPTX-DISABLED: worker_state_machine.begin: 1370; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1371; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1372; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1373; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1374; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1375; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1376; NVPTX-DISABLED: worker_state_machine.finished: 1377; NVPTX-DISABLED-NEXT: ret void 1378; NVPTX-DISABLED: worker_state_machine.is_active.check: 1379; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1380; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 1381; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1382; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1383; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 1384; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1385; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1386; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1387; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1388; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1389; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1390; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1391; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1392; NVPTX-DISABLED: worker_state_machine.done.barrier: 1393; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1394; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1395; NVPTX-DISABLED: thread.user_code.check: 1396; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1397; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1398; NVPTX-DISABLED: common.ret: 1399; NVPTX-DISABLED-NEXT: ret void 1400; NVPTX-DISABLED: user_code.entry: 1401; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1402; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1403; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1404; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1405; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1406; 1407entry: 1408 %.zero.addr = alloca i32, align 4 1409 %.threadid_temp. = alloca i32, align 4 1410 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1411 %exec_user_code = icmp eq i32 %0, -1 1412 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1413 1414common.ret: ; preds = %entry, %user_code.entry 1415 ret void 1416 1417user_code.entry: ; preds = %entry 1418 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1419 store i32 0, i32* %.zero.addr, align 4 1420 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1421 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6 1422 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1423 br label %common.ret 1424} 1425 1426; Function Attrs: alwaysinline convergent norecurse nounwind 1427define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1428; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 1429; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1430; AMDGPU-NEXT: entry: 1431; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1432; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* 1433; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] 1434; AMDGPU: region.check.tid: 1435; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 1436; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1437; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1438; AMDGPU: region.guarded: 1439; AMDGPU-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1440; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] 1441; AMDGPU: region.guarded.end: 1442; AMDGPU-NEXT: br label [[REGION_BARRIER]] 1443; AMDGPU: region.barrier: 1444; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1445; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] 1446; AMDGPU: region.exit: 1447; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1448; AMDGPU: for.cond: 1449; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1450; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1451; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1452; AMDGPU: for.cond.cleanup: 1453; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 1454; AMDGPU-NEXT: ret void 1455; AMDGPU: for.body: 1456; AMDGPU-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1457; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1458; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1459; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1460; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) 1461; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1462; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1463; 1464; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 1465; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1466; NVPTX-NEXT: entry: 1467; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1468; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* 1469; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] 1470; NVPTX: region.check.tid: 1471; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 1472; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1473; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1474; NVPTX: region.guarded: 1475; NVPTX-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1476; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] 1477; NVPTX: region.guarded.end: 1478; NVPTX-NEXT: br label [[REGION_BARRIER]] 1479; NVPTX: region.barrier: 1480; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1481; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] 1482; NVPTX: region.exit: 1483; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1484; NVPTX: for.cond: 1485; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1486; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1487; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1488; NVPTX: for.cond.cleanup: 1489; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 1490; NVPTX-NEXT: ret void 1491; NVPTX: for.body: 1492; NVPTX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1493; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1494; NVPTX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1495; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1496; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) 1497; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1498; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1499; 1500; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1501; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1502; AMDGPU-DISABLED-NEXT: entry: 1503; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1504; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* 1505; AMDGPU-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1506; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1507; AMDGPU-DISABLED: for.cond: 1508; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1509; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1510; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1511; AMDGPU-DISABLED: for.cond.cleanup: 1512; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1513; AMDGPU-DISABLED-NEXT: ret void 1514; AMDGPU-DISABLED: for.body: 1515; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1516; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1517; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1518; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1519; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) 1520; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1521; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1522; 1523; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1524; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1525; NVPTX-DISABLED-NEXT: entry: 1526; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1527; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* 1528; NVPTX-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1529; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1530; NVPTX-DISABLED: for.cond: 1531; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1532; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1533; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1534; NVPTX-DISABLED: for.cond.cleanup: 1535; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1536; NVPTX-DISABLED-NEXT: ret void 1537; NVPTX-DISABLED: for.body: 1538; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1539; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1540; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1541; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1542; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) 1543; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1544; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1545; 1546entry: 1547 %captured_vars_addrs = alloca [1 x i8*], align 8 1548 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1549 %x_on_stack = bitcast i8* %x to i32* 1550 store i32 42, i32* %x_on_stack, align 4, !tbaa !18 1551 br label %for.cond 1552 1553for.cond: ; preds = %for.body, %entry 1554 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1555 %cmp = icmp slt i32 %i.0, 100 1556 br i1 %cmp, label %for.body, label %for.cond.cleanup 1557 1558for.cond.cleanup: ; preds = %for.cond 1559 call void @spmd_amenable() #10 1560 call void @__kmpc_free_shared(i8* %x, i64 4) 1561 ret void 1562 1563for.body: ; preds = %for.cond 1564 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1565 store i8* %x, i8** %0, align 8, !tbaa !26 1566 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1567 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1568 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 1) 1569 %inc = add nsw i32 %i.0, 1 1570 br label %for.cond, !llvm.loop !29 1571} 1572 1573; Function Attrs: alwaysinline convergent norecurse nounwind 1574define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1575; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 1576; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1577; AMDGPU-NEXT: entry: 1578; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1579; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1580; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1581; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1582; AMDGPU-NEXT: ret void 1583; 1584; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 1585; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1586; NVPTX-NEXT: entry: 1587; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1588; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1589; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1590; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1591; NVPTX-NEXT: ret void 1592; 1593; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1594; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1595; AMDGPU-DISABLED-NEXT: entry: 1596; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1597; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1598; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1599; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1600; AMDGPU-DISABLED-NEXT: ret void 1601; 1602; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1603; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1604; NVPTX-DISABLED-NEXT: entry: 1605; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1606; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1607; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1608; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1609; NVPTX-DISABLED-NEXT: ret void 1610; 1611entry: 1612 %0 = load i32, i32* %x, align 4, !tbaa !18 1613 %inc = add nsw i32 %0, 1 1614 store i32 %inc, i32* %x, align 4, !tbaa !18 1615 call void @unknown() #11 1616 ret void 1617} 1618 1619; Function Attrs: convergent norecurse nounwind 1620define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { 1621; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1622; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1623; AMDGPU-NEXT: entry: 1624; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1625; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1626; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1627; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1628; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1629; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1630; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1631; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1632; AMDGPU-NEXT: ret void 1633; 1634; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1635; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1636; NVPTX-NEXT: entry: 1637; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1638; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1639; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1640; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1641; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1642; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1643; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1644; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1645; NVPTX-NEXT: ret void 1646; 1647; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1648; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1649; AMDGPU-DISABLED-NEXT: entry: 1650; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1651; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1652; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1653; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1654; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1655; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1656; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1657; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1658; AMDGPU-DISABLED-NEXT: ret void 1659; 1660; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1661; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1662; NVPTX-DISABLED-NEXT: entry: 1663; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1664; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1665; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1666; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1667; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1668; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1669; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1670; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1671; NVPTX-DISABLED-NEXT: ret void 1672; 1673entry: 1674 %.addr1 = alloca i32, align 4 1675 %.zero.addr = alloca i32, align 4 1676 %global_args = alloca i8**, align 8 1677 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1678 store i32 0, i32* %.zero.addr, align 4 1679 call void @__kmpc_get_shared_variables(i8*** %global_args) 1680 %2 = load i8**, i8*** %global_args, align 8 1681 %3 = bitcast i8** %2 to i32** 1682 %4 = load i32*, i32** %3, align 8, !tbaa !26 1683 call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1684 ret void 1685} 1686 1687; Function Attrs: alwaysinline convergent norecurse nounwind 1688define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { 1689; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1690; AMDGPU-SAME: () #[[ATTR0]] { 1691; AMDGPU-NEXT: entry: 1692; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1693; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1694; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1695; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1696; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1697; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1698; AMDGPU: is_worker_check: 1699; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1700; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1701; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1702; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1703; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1704; AMDGPU: worker_state_machine.begin: 1705; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1706; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1707; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1708; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1709; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1710; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1711; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1712; AMDGPU: worker_state_machine.finished: 1713; AMDGPU-NEXT: ret void 1714; AMDGPU: worker_state_machine.is_active.check: 1715; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1716; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1717; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1718; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1719; AMDGPU: worker_state_machine.parallel_region.end: 1720; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1721; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1722; AMDGPU: worker_state_machine.done.barrier: 1723; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1724; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1725; AMDGPU: thread.user_code.check: 1726; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1727; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1728; AMDGPU: common.ret: 1729; AMDGPU-NEXT: ret void 1730; AMDGPU: user_code.entry: 1731; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1732; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1733; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1734; AMDGPU-NEXT: br label [[COMMON_RET]] 1735; 1736; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1737; NVPTX-SAME: () #[[ATTR0]] { 1738; NVPTX-NEXT: entry: 1739; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1740; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1741; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1742; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1743; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1744; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1745; NVPTX: is_worker_check: 1746; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1747; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1748; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1749; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1750; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1751; NVPTX: worker_state_machine.begin: 1752; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1753; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1754; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1755; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1756; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1757; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1758; NVPTX: worker_state_machine.finished: 1759; NVPTX-NEXT: ret void 1760; NVPTX: worker_state_machine.is_active.check: 1761; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1762; NVPTX: worker_state_machine.parallel_region.fallback.execute: 1763; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1764; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1765; NVPTX: worker_state_machine.parallel_region.end: 1766; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 1767; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1768; NVPTX: worker_state_machine.done.barrier: 1769; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1770; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1771; NVPTX: thread.user_code.check: 1772; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1773; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1774; NVPTX: common.ret: 1775; NVPTX-NEXT: ret void 1776; NVPTX: user_code.entry: 1777; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1778; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1779; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1780; NVPTX-NEXT: br label [[COMMON_RET]] 1781; 1782; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1783; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1784; AMDGPU-DISABLED-NEXT: entry: 1785; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1786; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1787; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1788; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1789; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1790; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1791; AMDGPU-DISABLED: is_worker_check: 1792; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1793; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1794; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1795; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1796; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1797; AMDGPU-DISABLED: worker_state_machine.begin: 1798; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1799; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1800; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1801; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1802; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1803; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1804; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1805; AMDGPU-DISABLED: worker_state_machine.finished: 1806; AMDGPU-DISABLED-NEXT: ret void 1807; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1808; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1809; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1810; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1811; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1812; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1813; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1814; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1815; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1816; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1817; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1818; AMDGPU-DISABLED: thread.user_code.check: 1819; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1820; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1821; AMDGPU-DISABLED: common.ret: 1822; AMDGPU-DISABLED-NEXT: ret void 1823; AMDGPU-DISABLED: user_code.entry: 1824; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1825; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1826; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1827; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1828; 1829; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1830; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1831; NVPTX-DISABLED-NEXT: entry: 1832; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1833; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1834; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1835; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1836; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1837; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1838; NVPTX-DISABLED: is_worker_check: 1839; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1840; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1841; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1842; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1843; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1844; NVPTX-DISABLED: worker_state_machine.begin: 1845; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1846; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1847; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1848; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1849; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1850; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1851; NVPTX-DISABLED: worker_state_machine.finished: 1852; NVPTX-DISABLED-NEXT: ret void 1853; NVPTX-DISABLED: worker_state_machine.is_active.check: 1854; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1855; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1856; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1857; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1858; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1859; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1860; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1861; NVPTX-DISABLED: worker_state_machine.done.barrier: 1862; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1863; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1864; NVPTX-DISABLED: thread.user_code.check: 1865; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1866; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1867; NVPTX-DISABLED: common.ret: 1868; NVPTX-DISABLED-NEXT: ret void 1869; NVPTX-DISABLED: user_code.entry: 1870; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1871; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 1872; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1873; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1874; 1875entry: 1876 %.zero.addr = alloca i32, align 4 1877 %.threadid_temp. = alloca i32, align 4 1878 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1879 %exec_user_code = icmp eq i32 %0, -1 1880 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1881 1882common.ret: ; preds = %entry, %user_code.entry 1883 ret void 1884 1885user_code.entry: ; preds = %entry 1886 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1887 store i32 0, i32* %.zero.addr, align 4 1888 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1889 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6 1890 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1891 br label %common.ret 1892} 1893 1894; Function Attrs: alwaysinline convergent norecurse nounwind 1895define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1896; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 1897; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1898; AMDGPU-NEXT: entry: 1899; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1900; AMDGPU-NEXT: ret void 1901; 1902; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 1903; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1904; NVPTX-NEXT: entry: 1905; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1906; NVPTX-NEXT: ret void 1907; 1908; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1909; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1910; AMDGPU-DISABLED-NEXT: entry: 1911; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1912; AMDGPU-DISABLED-NEXT: ret void 1913; 1914; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1915; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1916; NVPTX-DISABLED-NEXT: entry: 1917; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1918; NVPTX-DISABLED-NEXT: ret void 1919; 1920entry: 1921 call void @unknown() #11 1922 ret void 1923} 1924 1925; Function Attrs: alwaysinline convergent norecurse nounwind 1926define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { 1927; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 1928; AMDGPU-SAME: () #[[ATTR0]] { 1929; AMDGPU-NEXT: entry: 1930; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1931; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 1932; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1933; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1934; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1935; AMDGPU: is_worker_check: 1936; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1937; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1938; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1939; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1940; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1941; AMDGPU: worker_state_machine.begin: 1942; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1943; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1944; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1945; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1946; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1947; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1948; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1949; AMDGPU: worker_state_machine.finished: 1950; AMDGPU-NEXT: ret void 1951; AMDGPU: worker_state_machine.is_active.check: 1952; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1953; AMDGPU: worker_state_machine.parallel_region.check: 1954; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 1955; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1956; AMDGPU: worker_state_machine.parallel_region.execute: 1957; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 1958; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1959; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1960; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1961; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1962; AMDGPU: worker_state_machine.parallel_region.end: 1963; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1964; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1965; AMDGPU: worker_state_machine.done.barrier: 1966; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1967; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1968; AMDGPU: thread.user_code.check: 1969; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1970; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1971; AMDGPU: common.ret: 1972; AMDGPU-NEXT: ret void 1973; AMDGPU: user_code.entry: 1974; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1975; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 1976; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 1977; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 1978; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1979; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 1980; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1981; AMDGPU-NEXT: br label [[COMMON_RET]] 1982; 1983; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 1984; NVPTX-SAME: () #[[ATTR0]] { 1985; NVPTX-NEXT: entry: 1986; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1987; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 1988; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1989; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1990; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1991; NVPTX: is_worker_check: 1992; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1993; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1994; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1995; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1996; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1997; NVPTX: worker_state_machine.begin: 1998; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1999; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 2000; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 2001; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2002; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2003; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2004; NVPTX: worker_state_machine.finished: 2005; NVPTX-NEXT: ret void 2006; NVPTX: worker_state_machine.is_active.check: 2007; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2008; NVPTX: worker_state_machine.parallel_region.check: 2009; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2010; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2011; NVPTX: worker_state_machine.parallel_region.execute: 2012; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2013; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2014; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2015; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2016; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2017; NVPTX: worker_state_machine.parallel_region.end: 2018; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2019; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2020; NVPTX: worker_state_machine.done.barrier: 2021; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2022; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2023; NVPTX: thread.user_code.check: 2024; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2025; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2026; NVPTX: common.ret: 2027; NVPTX-NEXT: ret void 2028; NVPTX: user_code.entry: 2029; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2030; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2031; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2032; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2033; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2034; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2035; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2036; NVPTX-NEXT: br label [[COMMON_RET]] 2037; 2038; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2039; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 2040; AMDGPU-DISABLED-NEXT: entry: 2041; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 2042; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2043; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2044; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2045; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2046; AMDGPU-DISABLED: is_worker_check: 2047; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2048; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2049; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2050; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2051; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2052; AMDGPU-DISABLED: worker_state_machine.begin: 2053; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2054; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 2055; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 2056; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2057; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2058; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2059; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2060; AMDGPU-DISABLED: worker_state_machine.finished: 2061; AMDGPU-DISABLED-NEXT: ret void 2062; AMDGPU-DISABLED: worker_state_machine.is_active.check: 2063; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2064; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 2065; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2066; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2067; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 2068; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2069; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2070; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2071; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2072; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2073; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 2074; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2075; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2076; AMDGPU-DISABLED: worker_state_machine.done.barrier: 2077; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2078; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2079; AMDGPU-DISABLED: thread.user_code.check: 2080; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2081; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2082; AMDGPU-DISABLED: common.ret: 2083; AMDGPU-DISABLED-NEXT: ret void 2084; AMDGPU-DISABLED: user_code.entry: 2085; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2086; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2087; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2088; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2089; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2090; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2091; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2092; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 2093; 2094; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2095; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 2096; NVPTX-DISABLED-NEXT: entry: 2097; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 2098; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2099; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2100; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2101; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2102; NVPTX-DISABLED: is_worker_check: 2103; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2104; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2105; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2106; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2107; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2108; NVPTX-DISABLED: worker_state_machine.begin: 2109; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2110; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 2111; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 2112; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2113; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2114; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2115; NVPTX-DISABLED: worker_state_machine.finished: 2116; NVPTX-DISABLED-NEXT: ret void 2117; NVPTX-DISABLED: worker_state_machine.is_active.check: 2118; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2119; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 2120; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2121; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2122; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 2123; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2124; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2125; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2126; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2127; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2128; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 2129; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2130; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2131; NVPTX-DISABLED: worker_state_machine.done.barrier: 2132; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2133; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2134; NVPTX-DISABLED: thread.user_code.check: 2135; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2136; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2137; NVPTX-DISABLED: common.ret: 2138; NVPTX-DISABLED-NEXT: ret void 2139; NVPTX-DISABLED: user_code.entry: 2140; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2141; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2142; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2143; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2144; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2145; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2146; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2147; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 2148; 2149entry: 2150 %captured_vars_addrs = alloca [0 x i8*], align 8 2151 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 2152 %exec_user_code = icmp eq i32 %0, -1 2153 br i1 %exec_user_code, label %user_code.entry, label %common.ret 2154 2155common.ret: ; preds = %entry, %user_code.entry 2156 ret void 2157 2158user_code.entry: ; preds = %entry 2159 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 2160 %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 %1, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) 2161 %3 = bitcast i8* %2 to %struct.kmp_task_t_with_privates* 2162 %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2) 2163 %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 2164 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0) 2165 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 2166 br label %common.ret 2167} 2168 2169; Function Attrs: alwaysinline convergent nounwind 2170define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { 2171; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. 2172; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2173; AMDGPU-NEXT: entry: 2174; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 2175; AMDGPU-NEXT: ret void 2176; 2177; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. 2178; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2179; NVPTX-NEXT: entry: 2180; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 2181; NVPTX-NEXT: ret void 2182; 2183; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2184; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2185; AMDGPU-DISABLED-NEXT: entry: 2186; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 2187; AMDGPU-DISABLED-NEXT: ret void 2188; 2189; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2190; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2191; NVPTX-DISABLED-NEXT: entry: 2192; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 2193; NVPTX-DISABLED-NEXT: ret void 2194; 2195entry: 2196 call void @spmd_amenable() #10 2197 ret void 2198} 2199 2200; Function Attrs: convergent norecurse nounwind 2201define internal i32 @"_omp_task_entry$"(i32 %0, %struct.kmp_task_t_with_privates* noalias %1) #3 { 2202entry: 2203 %2 = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* %1, i32 0, i32 0 2204 %3 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 2 2205 %4 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 0 2206 %5 = load i8*, i8** %4, align 8, !tbaa !30 2207 %6 = bitcast i8* %5 to %struct.anon* 2208 %7 = bitcast %struct.kmp_task_t_with_privates* %1 to i8* 2209 call void @.omp_outlined.(i32 %0, i32* %3, i8* null, void (i8*, ...)* null, i8* %7, %struct.anon* %6) #6 2210 ret i32 0 2211} 2212 2213; Function Attrs: nounwind 2214declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #6 2215 2216; Function Attrs: nounwind 2217declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #6 2218 2219; Function Attrs: nosync nounwind 2220declare void @__kmpc_free_shared(i8* nocapture, i64) #8 2221 2222; Function Attrs: nofree nosync nounwind 2223declare i8* @__kmpc_alloc_shared(i64) #7 2224 2225; Function Attrs: convergent 2226declare void @use(i32* nocapture) #5 2227 2228; Function Attrs: convergent 2229declare void @unknown() #2 2230 2231; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2232declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 2233 2234declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) 2235 2236declare void @__kmpc_get_shared_variables(i8***) 2237 2238; Function Attrs: alwaysinline 2239declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4 2240 2241; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2242declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 2243 2244; Function Attrs: convergent 2245declare void @spmd_amenable() #5 2246 2247; Function Attrs: nounwind 2248declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6 2249 2250declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) 2251 2252 2253; Function Attrs: alwaysinline convergent norecurse nounwind 2254define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 2255; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 2256; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2257; AMDGPU-NEXT: entry: 2258; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 2259; AMDGPU-NEXT: ret void 2260; 2261; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 2262; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2263; NVPTX-NEXT: entry: 2264; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 2265; NVPTX-NEXT: ret void 2266; 2267; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2268; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2269; AMDGPU-DISABLED-NEXT: entry: 2270; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 2271; AMDGPU-DISABLED-NEXT: ret void 2272; 2273; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2274; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2275; NVPTX-DISABLED-NEXT: entry: 2276; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 2277; NVPTX-DISABLED-NEXT: ret void 2278; 2279entry: 2280 call void @unknown() #11 2281 ret void 2282} 2283 2284; Function Attrs: convergent norecurse nounwind 2285define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { 2286; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2287; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2288; AMDGPU-NEXT: entry: 2289; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2290; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2291; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2292; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2293; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2294; AMDGPU-NEXT: ret void 2295; 2296; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2297; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2298; NVPTX-NEXT: entry: 2299; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2300; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2301; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2302; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2303; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2304; NVPTX-NEXT: ret void 2305; 2306; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2307; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2308; AMDGPU-DISABLED-NEXT: entry: 2309; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2310; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2311; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2312; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2313; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2314; AMDGPU-DISABLED-NEXT: ret void 2315; 2316; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2317; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2318; NVPTX-DISABLED-NEXT: entry: 2319; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2320; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2321; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2322; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2323; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2324; NVPTX-DISABLED-NEXT: ret void 2325; 2326entry: 2327 %.addr1 = alloca i32, align 4 2328 %.zero.addr = alloca i32, align 4 2329 %global_args = alloca i8**, align 8 2330 store i32 %1, i32* %.addr1, align 4, !tbaa !18 2331 store i32 0, i32* %.zero.addr, align 4 2332 call void @__kmpc_get_shared_variables(i8*** %global_args) 2333 call void @__omp_outlined__9(i32* %.addr1, i32* %.zero.addr) #6 2334 ret void 2335} 2336 2337declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block(); 2338 2339attributes #0 = { alwaysinline convergent norecurse nounwind } 2340attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } 2341attributes #2 = { convergent } 2342attributes #3 = { convergent norecurse nounwind } 2343attributes #4 = { alwaysinline } 2344attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2345attributes #6 = { nounwind } 2346attributes #7 = { nofree nosync nounwind } 2347attributes #8 = { nosync nounwind } 2348attributes #9 = { alwaysinline convergent nounwind } 2349attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2350attributes #11 = { convergent } 2351 2352!omp_offload.info = !{!0, !1, !2, !3, !4, !5} 2353!nvvm.annotations = !{!6, !7, !8, !9, !10, !11} 2354!llvm.module.flags = !{!12, !13, !14, !15, !16} 2355!llvm.ident = !{!17} 2356 2357!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2358!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2359!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2360!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2361!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2362!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2363!6 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2364!7 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2365!8 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2366!9 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2367!10 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2368!11 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2369!12 = !{i32 1, !"wchar_size", i32 4} 2370!13 = !{i32 7, !"openmp", i32 50} 2371!14 = !{i32 7, !"openmp-device", i32 50} 2372!15 = !{i32 7, !"PIC Level", i32 2} 2373!16 = !{i32 7, !"frame-pointer", i32 2} 2374!17 = !{!"clang version 14.0.0"} 2375!18 = !{!19, !19, i64 0} 2376!19 = !{!"int", !20, i64 0} 2377!20 = !{!"omnipotent char", !21, i64 0} 2378!21 = !{!"Simple C/C++ TBAA"} 2379!22 = distinct !{!22, !23, !24} 2380!23 = !{!"llvm.loop.mustprogress"} 2381!24 = !{!"llvm.loop.unroll.disable"} 2382!25 = distinct !{!25, !23, !24} 2383!26 = !{!27, !27, i64 0} 2384!27 = !{!"any pointer", !20, i64 0} 2385!28 = distinct !{!28, !23, !24} 2386!29 = distinct !{!29, !23, !24} 2387!30 = !{!31, !27, i64 0} 2388!31 = !{!"kmp_task_t_with_privates", !32, i64 0} 2389!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2390;. 2391; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2392; AMDGPU: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2393; AMDGPU: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2394; AMDGPU: attributes #[[ATTR3]] = { nounwind } 2395; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2396; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2397; AMDGPU: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2398; AMDGPU: attributes #[[ATTR7]] = { convergent } 2399; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2400; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2401; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2402;. 2403; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2404; NVPTX: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2405; NVPTX: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2406; NVPTX: attributes #[[ATTR3]] = { nounwind } 2407; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2408; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2409; NVPTX: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2410; NVPTX: attributes #[[ATTR7]] = { convergent } 2411; NVPTX: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2412; NVPTX: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2413; NVPTX: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2414;. 2415; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2416; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2417; AMDGPU-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2418; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } 2419; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2420; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2421; AMDGPU-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2422; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent } 2423; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2424; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2425; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2426;. 2427; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2428; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2429; NVPTX-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2430; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } 2431; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2432; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2433; NVPTX-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2434; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent } 2435; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2436; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2437; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2438;. 2439; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2440; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2441; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2442; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2443; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2444; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2445; AMDGPU: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2446; AMDGPU: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2447; AMDGPU: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2448; AMDGPU: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2449; AMDGPU: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2450; AMDGPU: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2451; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2452; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2453; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2454; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2455; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2456; AMDGPU: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2457; AMDGPU: [[TBAA18]] = !{!19, !19, i64 0} 2458; AMDGPU: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2459; AMDGPU: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2460; AMDGPU: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2461; AMDGPU: [[LOOP22]] = distinct !{!22, !23, !24} 2462; AMDGPU: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2463; AMDGPU: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2464; AMDGPU: [[LOOP25]] = distinct !{!25, !23, !24} 2465; AMDGPU: [[TBAA26]] = !{!27, !27, i64 0} 2466; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2467; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24} 2468; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24} 2469; AMDGPU: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2470; AMDGPU: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2471; AMDGPU: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2472;. 2473; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2474; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2475; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2476; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2477; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2478; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2479; NVPTX: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2480; NVPTX: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2481; NVPTX: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2482; NVPTX: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2483; NVPTX: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2484; NVPTX: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2485; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2486; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2487; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2488; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2489; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2490; NVPTX: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2491; NVPTX: [[TBAA18]] = !{!19, !19, i64 0} 2492; NVPTX: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2493; NVPTX: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2494; NVPTX: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2495; NVPTX: [[LOOP22]] = distinct !{!22, !23, !24} 2496; NVPTX: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2497; NVPTX: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2498; NVPTX: [[LOOP25]] = distinct !{!25, !23, !24} 2499; NVPTX: [[TBAA26]] = !{!27, !27, i64 0} 2500; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2501; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24} 2502; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24} 2503; NVPTX: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2504; NVPTX: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2505; NVPTX: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2506;. 2507; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2508; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2509; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2510; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2511; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2512; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2513; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2514; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2515; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2516; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2517; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2518; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2519; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2520; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2521; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2522; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2523; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2524; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2525; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2526; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2527; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2528; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2529; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2530; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2531; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2532; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2533; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2534; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2535; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2536; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2537; AMDGPU-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2538; AMDGPU-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2539; AMDGPU-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2540;. 2541; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2542; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2543; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2544; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2545; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2546; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2547; NVPTX-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2548; NVPTX-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2549; NVPTX-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2550; NVPTX-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2551; NVPTX-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2552; NVPTX-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2553; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2554; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2555; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2556; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2557; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2558; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2559; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2560; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2561; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2562; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2563; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2564; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2565; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2566; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2567; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2568; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2569; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2570; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2571; NVPTX-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2572; NVPTX-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2573; NVPTX-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2574;. 2575