1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU 3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX 4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED 5; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED 6 7;; void unknown(void); 8;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable"))); 9;; 10;; void sequential_loop() { 11;; #pragma omp target teams 12;; { 13;; for (int i = 0; i < 100; ++i) { 14;; #pragma omp parallel 15;; { 16;; unknown(); 17;; } 18;; } 19;; spmd_amenable(); 20;; } 21;; } 22;; 23;; void use(__attribute__((noescape)) int *) __attribute__((assume("ompx_spmd_amenable"))); 24;; 25;; void sequential_loop_to_stack_var() { 26;; #pragma omp target teams 27;; { 28;; int x; 29;; use(&x); 30;; for (int i = 0; i < 100; ++i) { 31;; #pragma omp parallel 32;; { 33;; unknown(); 34;; } 35;; } 36;; spmd_amenable(); 37;; } 38;; } 39;; 40;; void sequential_loop_to_shared_var() { 41;; #pragma omp target teams 42;; { 43;; int x; 44;; for (int i = 0; i < 100; ++i) { 45;; #pragma omp parallel 46;; { 47;; x++; 48;; unknown(); 49;; } 50;; } 51;; spmd_amenable(); 52;; } 53;; } 54;; 55;; void sequential_loop_to_shared_var_guarded() { 56;; #pragma omp target teams 57;; { 58;; int x = 42; 59;; for (int i = 0; i < 100; ++i) { 60;; #pragma omp parallel 61;; { 62;; x++; 63;; unknown(); 64;; } 65;; } 66;; spmd_amenable(); 67;; } 68;; } 69;; 70;; void do_not_spmdize_target() { 71;; #pragma omp target teams 72;; { 73;; // Incompatible parallel level, called both 74;; // from parallel and target regions 75;; unknown(); 76;; } 77;; } 78;; 79;; void do_not_spmdize_task() { 80;; #pragma omp target 81;; { 82;; #pragma omp task 83;; spmd_amenable(); 84;; #pragma omp parallel 85;; unknown(); 86;; } 87;; } 88 89%struct.ident_t = type { i32, i32, i32, i32, i8* } 90%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t } 91%struct.kmp_task_t = type { i8*, i32 (i32, i8*)*, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } 92%union.kmp_cmplrdata_t = type { i32 (i32, i8*)* } 93%struct.anon = type {} 94 95@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 96@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 97@__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1 98@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 99@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 100@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 101@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 102@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1 103@llvm.compiler.used = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 104 105; Function Attrs: alwaysinline convergent norecurse nounwind 106;. 107; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 108; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 109; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 110; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 111; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 112; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 113; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 114; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 115; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 116; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 117; AMDGPU: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 118; AMDGPU: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 119; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 120;. 121; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 122; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 123; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 124; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 125; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 126; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 127; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 128; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 129; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 130; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 131; NVPTX: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 132; NVPTX: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 133; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 134;. 135; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 136; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 137; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 138; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 139; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 140; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 141; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 142; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 143; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 144; AMDGPU-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 145; AMDGPU-DISABLED: @[[X_SHARED_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 146; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 147; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 148; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 149; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 150; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 151;. 152; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 153; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 154; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 155; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 156; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 157; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 158; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 159; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 160; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 161; NVPTX-DISABLED: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 162; NVPTX-DISABLED: @[[X_SHARED1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 163; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 164; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 165; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 166; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 167; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 168;. 169define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { 170; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 171; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { 172; AMDGPU-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 173; AMDGPU-NEXT: ret void 174; 175; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 176; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { 177; NVPTX-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 178; NVPTX-NEXT: ret void 179; 180; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 181; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 182; AMDGPU-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 183; AMDGPU-DISABLED-NEXT: ret void 184; 185; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 186; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 187; NVPTX-DISABLED-NEXT: call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 188; NVPTX-DISABLED-NEXT: ret void 189; 190 call void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() 191 ret void 192} 193 194define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() { 195; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 196; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { 197; AMDGPU-NEXT: entry: 198; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 199; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 200; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 201; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 202; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 203; AMDGPU: common.ret: 204; AMDGPU-NEXT: ret void 205; AMDGPU: user_code.entry: 206; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] 207; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 208; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 209; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 210; AMDGPU-NEXT: br label [[COMMON_RET]] 211; 212; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 213; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { 214; NVPTX-NEXT: entry: 215; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 216; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 217; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 218; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 219; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 220; NVPTX: common.ret: 221; NVPTX-NEXT: ret void 222; NVPTX: user_code.entry: 223; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] 224; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 225; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 226; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 227; NVPTX-NEXT: br label [[COMMON_RET]] 228; 229; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 230; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { 231; AMDGPU-DISABLED-NEXT: entry: 232; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 233; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 234; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 235; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 236; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 237; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 238; AMDGPU-DISABLED: is_worker_check: 239; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 240; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 241; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 242; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 243; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 244; AMDGPU-DISABLED: worker_state_machine.begin: 245; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 246; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 247; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 248; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 249; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 250; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 251; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 252; AMDGPU-DISABLED: worker_state_machine.finished: 253; AMDGPU-DISABLED-NEXT: ret void 254; AMDGPU-DISABLED: worker_state_machine.is_active.check: 255; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 256; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 257; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 258; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 259; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 260; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 261; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 262; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 263; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 264; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 265; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 266; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 267; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 268; AMDGPU-DISABLED: worker_state_machine.done.barrier: 269; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 270; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 271; AMDGPU-DISABLED: thread.user_code.check: 272; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 273; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 274; AMDGPU-DISABLED: common.ret: 275; AMDGPU-DISABLED-NEXT: ret void 276; AMDGPU-DISABLED: user_code.entry: 277; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] 278; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 279; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 280; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 281; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 282; 283; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug 284; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { 285; NVPTX-DISABLED-NEXT: entry: 286; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 287; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 288; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 289; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 290; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 291; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 292; NVPTX-DISABLED: is_worker_check: 293; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 294; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 295; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 296; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 297; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 298; NVPTX-DISABLED: worker_state_machine.begin: 299; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 300; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 301; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 302; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 303; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 304; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 305; NVPTX-DISABLED: worker_state_machine.finished: 306; NVPTX-DISABLED-NEXT: ret void 307; NVPTX-DISABLED: worker_state_machine.is_active.check: 308; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 309; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 310; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 311; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 312; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 313; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 314; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 315; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 316; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 317; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 318; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 319; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 320; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 321; NVPTX-DISABLED: worker_state_machine.done.barrier: 322; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 323; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 324; NVPTX-DISABLED: thread.user_code.check: 325; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 326; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 327; NVPTX-DISABLED: common.ret: 328; NVPTX-DISABLED-NEXT: ret void 329; NVPTX-DISABLED: user_code.entry: 330; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] 331; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 332; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 333; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 334; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 335; 336entry: 337 %.zero.addr = alloca i32, align 4 338 %.threadid_temp. = alloca i32, align 4 339 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 340 %exec_user_code = icmp eq i32 %0, -1 341 br i1 %exec_user_code, label %user_code.entry, label %common.ret 342 343common.ret: ; preds = %entry, %user_code.entry 344 ret void 345 346user_code.entry: ; preds = %entry 347 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 348 store i32 0, i32* %.zero.addr, align 4 349 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 350 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6 351 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 352 br label %common.ret 353} 354 355; Function Attrs: alwaysinline convergent norecurse nounwind 356define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 357; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ 358; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 359; AMDGPU-NEXT: entry: 360; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 361; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 362; AMDGPU: for.cond: 363; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 364; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 365; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 366; AMDGPU: for.cond.cleanup: 367; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 368; AMDGPU-NEXT: ret void 369; AMDGPU: for.body: 370; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 371; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 372; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) 373; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 374; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 375; 376; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ 377; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 378; NVPTX-NEXT: entry: 379; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 380; NVPTX-NEXT: br label [[FOR_COND:%.*]] 381; NVPTX: for.cond: 382; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 383; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 384; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 385; NVPTX: for.cond.cleanup: 386; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 387; NVPTX-NEXT: ret void 388; NVPTX: for.body: 389; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 390; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 391; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) 392; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 393; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 394; 395; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 396; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 397; AMDGPU-DISABLED-NEXT: entry: 398; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 399; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 400; AMDGPU-DISABLED: for.cond: 401; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 402; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 403; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 404; AMDGPU-DISABLED: for.cond.cleanup: 405; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 406; AMDGPU-DISABLED-NEXT: ret void 407; AMDGPU-DISABLED: for.body: 408; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 409; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 410; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) 411; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 412; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 413; 414; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 415; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 416; NVPTX-DISABLED-NEXT: entry: 417; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 418; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 419; NVPTX-DISABLED: for.cond: 420; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 421; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 422; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 423; NVPTX-DISABLED: for.cond.cleanup: 424; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] 425; NVPTX-DISABLED-NEXT: ret void 426; NVPTX-DISABLED: for.body: 427; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 428; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 429; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) 430; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 431; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 432; 433entry: 434 %captured_vars_addrs = alloca [0 x i8*], align 8 435 br label %for.cond 436 437for.cond: ; preds = %for.body, %entry 438 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 439 %cmp = icmp slt i32 %i.0, 100 440 br i1 %cmp, label %for.body, label %for.cond.cleanup 441 442for.cond.cleanup: ; preds = %for.cond 443 call void @spmd_amenable() #10 444 ret void 445 446for.body: ; preds = %for.cond 447 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 448 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 449 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0) 450 %inc = add nsw i32 %i.0, 1 451 br label %for.cond, !llvm.loop !22 452} 453 454; Function Attrs: alwaysinline convergent norecurse nounwind 455define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 456; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 457; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 458; AMDGPU-NEXT: entry: 459; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 460; AMDGPU-NEXT: ret void 461; 462; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 463; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 464; NVPTX-NEXT: entry: 465; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 466; NVPTX-NEXT: ret void 467; 468; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 469; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 470; AMDGPU-DISABLED-NEXT: entry: 471; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 472; AMDGPU-DISABLED-NEXT: ret void 473; 474; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 475; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 476; NVPTX-DISABLED-NEXT: entry: 477; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] 478; NVPTX-DISABLED-NEXT: ret void 479; 480entry: 481 call void @unknown() #11 482 ret void 483} 484 485; Function Attrs: convergent norecurse nounwind 486define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { 487; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 488; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 489; AMDGPU-NEXT: entry: 490; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 491; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 492; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 493; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 494; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 495; AMDGPU-NEXT: ret void 496; 497; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 498; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 499; NVPTX-NEXT: entry: 500; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 501; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 502; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 503; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 504; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 505; NVPTX-NEXT: ret void 506; 507; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 508; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 509; AMDGPU-DISABLED-NEXT: entry: 510; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 511; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 512; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 513; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 514; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 515; AMDGPU-DISABLED-NEXT: ret void 516; 517; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 518; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { 519; NVPTX-DISABLED-NEXT: entry: 520; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 521; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 522; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 523; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 524; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 525; NVPTX-DISABLED-NEXT: ret void 526; 527entry: 528 %.addr1 = alloca i32, align 4 529 %.zero.addr = alloca i32, align 4 530 %global_args = alloca i8**, align 8 531 store i32 %1, i32* %.addr1, align 4, !tbaa !18 532 store i32 0, i32* %.zero.addr, align 4 533 call void @__kmpc_get_shared_variables(i8*** %global_args) 534 call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #6 535 ret void 536} 537 538; Function Attrs: alwaysinline convergent norecurse nounwind 539define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { 540; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 541; AMDGPU-SAME: () #[[ATTR0]] { 542; AMDGPU-NEXT: entry: 543; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 544; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 545; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 546; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 547; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 548; AMDGPU: common.ret: 549; AMDGPU-NEXT: ret void 550; AMDGPU: user_code.entry: 551; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 552; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 553; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 554; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 555; AMDGPU-NEXT: br label [[COMMON_RET]] 556; 557; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 558; NVPTX-SAME: () #[[ATTR0]] { 559; NVPTX-NEXT: entry: 560; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 561; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 562; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 563; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 564; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 565; NVPTX: common.ret: 566; NVPTX-NEXT: ret void 567; NVPTX: user_code.entry: 568; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 569; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 570; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 571; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 572; NVPTX-NEXT: br label [[COMMON_RET]] 573; 574; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 575; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 576; AMDGPU-DISABLED-NEXT: entry: 577; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 578; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 579; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 580; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 581; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 582; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 583; AMDGPU-DISABLED: is_worker_check: 584; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 585; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 586; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 587; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 588; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 589; AMDGPU-DISABLED: worker_state_machine.begin: 590; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 591; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 592; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 593; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 594; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 595; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 596; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 597; AMDGPU-DISABLED: worker_state_machine.finished: 598; AMDGPU-DISABLED-NEXT: ret void 599; AMDGPU-DISABLED: worker_state_machine.is_active.check: 600; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 601; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 602; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 603; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 604; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 605; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 606; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 607; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 608; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 609; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 610; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 611; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 612; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 613; AMDGPU-DISABLED: worker_state_machine.done.barrier: 614; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 615; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 616; AMDGPU-DISABLED: thread.user_code.check: 617; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 618; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 619; AMDGPU-DISABLED: common.ret: 620; AMDGPU-DISABLED-NEXT: ret void 621; AMDGPU-DISABLED: user_code.entry: 622; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 623; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 624; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 625; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 626; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 627; 628; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 629; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 630; NVPTX-DISABLED-NEXT: entry: 631; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 632; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 633; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 634; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 635; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 636; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 637; NVPTX-DISABLED: is_worker_check: 638; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 639; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 640; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 641; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 642; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 643; NVPTX-DISABLED: worker_state_machine.begin: 644; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 645; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 646; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 647; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 648; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 649; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 650; NVPTX-DISABLED: worker_state_machine.finished: 651; NVPTX-DISABLED-NEXT: ret void 652; NVPTX-DISABLED: worker_state_machine.is_active.check: 653; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 654; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 655; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 656; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 657; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 658; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 659; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 660; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 661; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 662; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 663; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 664; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 665; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 666; NVPTX-DISABLED: worker_state_machine.done.barrier: 667; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 668; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 669; NVPTX-DISABLED: thread.user_code.check: 670; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 671; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 672; NVPTX-DISABLED: common.ret: 673; NVPTX-DISABLED-NEXT: ret void 674; NVPTX-DISABLED: user_code.entry: 675; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 676; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 677; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 678; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 679; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 680; 681entry: 682 %.zero.addr = alloca i32, align 4 683 %.threadid_temp. = alloca i32, align 4 684 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 685 %exec_user_code = icmp eq i32 %0, -1 686 br i1 %exec_user_code, label %user_code.entry, label %common.ret 687 688common.ret: ; preds = %entry, %user_code.entry 689 ret void 690 691user_code.entry: ; preds = %entry 692 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 693 store i32 0, i32* %.zero.addr, align 4 694 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 695 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6 696 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 697 br label %common.ret 698} 699 700; Function Attrs: alwaysinline convergent norecurse nounwind 701define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 702; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 703; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 704; AMDGPU-NEXT: entry: 705; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 706; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 707; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* 708; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* 709; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] 710; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 711; AMDGPU: for.cond: 712; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 713; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 714; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 715; AMDGPU: for.cond.cleanup: 716; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 717; AMDGPU-NEXT: ret void 718; AMDGPU: for.body: 719; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 720; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 721; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) 722; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 723; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 724; 725; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 726; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 727; NVPTX-NEXT: entry: 728; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 729; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 730; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 731; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] 732; NVPTX-NEXT: br label [[FOR_COND:%.*]] 733; NVPTX: for.cond: 734; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 735; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 736; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 737; NVPTX: for.cond.cleanup: 738; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 739; NVPTX-NEXT: ret void 740; NVPTX: for.body: 741; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 742; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 743; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) 744; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 745; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 746; 747; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 748; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 749; AMDGPU-DISABLED-NEXT: entry: 750; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) 751; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 752; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* 753; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* 754; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] 755; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 756; AMDGPU-DISABLED: for.cond: 757; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 758; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 759; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 760; AMDGPU-DISABLED: for.cond.cleanup: 761; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 762; AMDGPU-DISABLED-NEXT: ret void 763; AMDGPU-DISABLED: for.body: 764; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 765; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 766; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) 767; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 768; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 769; 770; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 771; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 772; NVPTX-DISABLED-NEXT: entry: 773; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 774; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 775; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 776; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] 777; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 778; NVPTX-DISABLED: for.cond: 779; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 780; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 781; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 782; NVPTX-DISABLED: for.cond.cleanup: 783; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 784; NVPTX-DISABLED-NEXT: ret void 785; NVPTX-DISABLED: for.body: 786; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 787; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 788; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) 789; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 790; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 791; 792entry: 793 %captured_vars_addrs = alloca [0 x i8*], align 8 794 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 795 %x_on_stack = bitcast i8* %x to i32* 796 call void @use(i32* nocapture %x_on_stack) #10 797 br label %for.cond 798 799for.cond: ; preds = %for.body, %entry 800 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 801 %cmp = icmp slt i32 %i.0, 100 802 br i1 %cmp, label %for.body, label %for.cond.cleanup 803 804for.cond.cleanup: ; preds = %for.cond 805 call void @spmd_amenable() #10 806 call void @__kmpc_free_shared(i8* %x, i64 4) 807 ret void 808 809for.body: ; preds = %for.cond 810 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 811 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 812 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) 813 %inc = add nsw i32 %i.0, 1 814 br label %for.cond, !llvm.loop !25 815} 816; Function Attrs: alwaysinline convergent norecurse nounwind 817define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 818; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 819; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 820; AMDGPU-NEXT: entry: 821; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 822; AMDGPU-NEXT: ret void 823; 824; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 825; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 826; NVPTX-NEXT: entry: 827; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 828; NVPTX-NEXT: ret void 829; 830; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 831; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 832; AMDGPU-DISABLED-NEXT: entry: 833; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 834; AMDGPU-DISABLED-NEXT: ret void 835; 836; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 837; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 838; NVPTX-DISABLED-NEXT: entry: 839; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 840; NVPTX-DISABLED-NEXT: ret void 841; 842entry: 843 call void @unknown() #11 844 ret void 845} 846 847; Function Attrs: convergent norecurse nounwind 848define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { 849; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 850; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 851; AMDGPU-NEXT: entry: 852; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 853; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 854; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 855; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 856; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 857; AMDGPU-NEXT: ret void 858; 859; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 860; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 861; NVPTX-NEXT: entry: 862; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 863; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 864; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 865; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 866; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 867; NVPTX-NEXT: ret void 868; 869; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 870; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 871; AMDGPU-DISABLED-NEXT: entry: 872; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 873; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 874; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 875; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 876; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 877; AMDGPU-DISABLED-NEXT: ret void 878; 879; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 880; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 881; NVPTX-DISABLED-NEXT: entry: 882; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 883; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 884; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 885; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 886; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 887; NVPTX-DISABLED-NEXT: ret void 888; 889entry: 890 %.addr1 = alloca i32, align 4 891 %.zero.addr = alloca i32, align 4 892 %global_args = alloca i8**, align 8 893 store i32 %1, i32* %.addr1, align 4, !tbaa !18 894 store i32 0, i32* %.zero.addr, align 4 895 call void @__kmpc_get_shared_variables(i8*** %global_args) 896 call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #6 897 ret void 898} 899 900 901; Function Attrs: alwaysinline convergent norecurse nounwind 902define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { 903; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 904; AMDGPU-SAME: () #[[ATTR0]] { 905; AMDGPU-NEXT: entry: 906; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 907; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 908; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 909; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 910; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 911; AMDGPU: common.ret: 912; AMDGPU-NEXT: ret void 913; AMDGPU: user_code.entry: 914; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 915; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 916; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 917; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 918; AMDGPU-NEXT: br label [[COMMON_RET]] 919; 920; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 921; NVPTX-SAME: () #[[ATTR0]] { 922; NVPTX-NEXT: entry: 923; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 924; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 925; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 926; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 927; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 928; NVPTX: common.ret: 929; NVPTX-NEXT: ret void 930; NVPTX: user_code.entry: 931; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 932; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 933; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 934; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 935; NVPTX-NEXT: br label [[COMMON_RET]] 936; 937; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 938; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 939; AMDGPU-DISABLED-NEXT: entry: 940; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 941; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 942; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 943; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 944; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 945; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 946; AMDGPU-DISABLED: is_worker_check: 947; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 948; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 949; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 950; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 951; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 952; AMDGPU-DISABLED: worker_state_machine.begin: 953; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 954; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 955; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 956; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 957; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 958; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 959; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 960; AMDGPU-DISABLED: worker_state_machine.finished: 961; AMDGPU-DISABLED-NEXT: ret void 962; AMDGPU-DISABLED: worker_state_machine.is_active.check: 963; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 964; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 965; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 966; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 967; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 968; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 969; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 970; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 971; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 972; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 973; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 974; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 975; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 976; AMDGPU-DISABLED: worker_state_machine.done.barrier: 977; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 978; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 979; AMDGPU-DISABLED: thread.user_code.check: 980; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 981; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 982; AMDGPU-DISABLED: common.ret: 983; AMDGPU-DISABLED-NEXT: ret void 984; AMDGPU-DISABLED: user_code.entry: 985; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 986; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 987; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 988; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 989; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 990; 991; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 992; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 993; NVPTX-DISABLED-NEXT: entry: 994; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 995; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 996; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 997; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 998; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 999; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1000; NVPTX-DISABLED: is_worker_check: 1001; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1002; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1003; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1004; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1005; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1006; NVPTX-DISABLED: worker_state_machine.begin: 1007; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1008; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1009; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1010; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1011; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1012; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1013; NVPTX-DISABLED: worker_state_machine.finished: 1014; NVPTX-DISABLED-NEXT: ret void 1015; NVPTX-DISABLED: worker_state_machine.is_active.check: 1016; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1017; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 1018; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 1019; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1020; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 1021; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 1022; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1023; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1024; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1025; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1026; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1027; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1028; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1029; NVPTX-DISABLED: worker_state_machine.done.barrier: 1030; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1031; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1032; NVPTX-DISABLED: thread.user_code.check: 1033; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1034; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1035; NVPTX-DISABLED: common.ret: 1036; NVPTX-DISABLED-NEXT: ret void 1037; NVPTX-DISABLED: user_code.entry: 1038; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1039; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1040; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1041; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1042; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1043; 1044entry: 1045 %.zero.addr = alloca i32, align 4 1046 %.threadid_temp. = alloca i32, align 4 1047 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1048 %exec_user_code = icmp eq i32 %0, -1 1049 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1050 1051common.ret: ; preds = %entry, %user_code.entry 1052 ret void 1053 1054user_code.entry: ; preds = %entry 1055 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1056 store i32 0, i32* %.zero.addr, align 4 1057 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1058 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6 1059 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1060 br label %common.ret 1061} 1062 1063; Function Attrs: alwaysinline convergent norecurse nounwind 1064define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1065; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 1066; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1067; AMDGPU-NEXT: entry: 1068; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1069; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1070; AMDGPU: for.cond: 1071; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1072; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1073; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1074; AMDGPU: for.cond.cleanup: 1075; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 1076; AMDGPU-NEXT: ret void 1077; AMDGPU: for.body: 1078; AMDGPU-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1079; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1080; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1081; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1082; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) 1083; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1084; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1085; 1086; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 1087; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1088; NVPTX-NEXT: entry: 1089; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1090; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1091; NVPTX: for.cond: 1092; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1093; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1094; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1095; NVPTX: for.cond.cleanup: 1096; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 1097; NVPTX-NEXT: ret void 1098; NVPTX: for.body: 1099; NVPTX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1100; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1101; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1102; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1103; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) 1104; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1105; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1106; 1107; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1108; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1109; AMDGPU-DISABLED-NEXT: entry: 1110; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1111; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1112; AMDGPU-DISABLED: for.cond: 1113; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1114; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1115; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1116; AMDGPU-DISABLED: for.cond.cleanup: 1117; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 1118; AMDGPU-DISABLED-NEXT: ret void 1119; AMDGPU-DISABLED: for.body: 1120; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1121; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1122; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1123; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1124; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) 1125; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1126; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1127; 1128; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1129; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1130; NVPTX-DISABLED-NEXT: entry: 1131; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1132; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1133; NVPTX-DISABLED: for.cond: 1134; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1135; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1136; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1137; NVPTX-DISABLED: for.cond.cleanup: 1138; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 1139; NVPTX-DISABLED-NEXT: ret void 1140; NVPTX-DISABLED: for.body: 1141; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1142; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1143; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1144; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1145; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) 1146; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1147; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1148; 1149entry: 1150 %captured_vars_addrs = alloca [1 x i8*], align 8 1151 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1152 %x_on_stack = bitcast i8* %x to i32* 1153 br label %for.cond 1154 1155for.cond: ; preds = %for.body, %entry 1156 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1157 %cmp = icmp slt i32 %i.0, 100 1158 br i1 %cmp, label %for.body, label %for.cond.cleanup 1159 1160for.cond.cleanup: ; preds = %for.cond 1161 call void @spmd_amenable() #10 1162 call void @__kmpc_free_shared(i8* %x, i64 4) 1163 ret void 1164 1165for.body: ; preds = %for.cond 1166 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1167 store i8* %x, i8** %0, align 8, !tbaa !26 1168 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1169 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1170 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 1) 1171 %inc = add nsw i32 %i.0, 1 1172 br label %for.cond, !llvm.loop !28 1173} 1174 1175; Function Attrs: alwaysinline convergent norecurse nounwind 1176define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1177; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 1178; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1179; AMDGPU-NEXT: entry: 1180; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1181; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1182; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1183; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 1184; AMDGPU-NEXT: ret void 1185; 1186; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 1187; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1188; NVPTX-NEXT: entry: 1189; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1190; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1191; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1192; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 1193; NVPTX-NEXT: ret void 1194; 1195; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1196; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1197; AMDGPU-DISABLED-NEXT: entry: 1198; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1199; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1200; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1201; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 1202; AMDGPU-DISABLED-NEXT: ret void 1203; 1204; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1205; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1206; NVPTX-DISABLED-NEXT: entry: 1207; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1208; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1209; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1210; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 1211; NVPTX-DISABLED-NEXT: ret void 1212; 1213entry: 1214 %0 = load i32, i32* %x, align 4, !tbaa !18 1215 %inc = add nsw i32 %0, 1 1216 store i32 %inc, i32* %x, align 4, !tbaa !18 1217 call void @unknown() #11 1218 ret void 1219} 1220 1221; Function Attrs: convergent norecurse nounwind 1222define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { 1223; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1224; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1225; AMDGPU-NEXT: entry: 1226; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1227; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1228; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1229; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1230; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1231; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1232; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1233; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1234; AMDGPU-NEXT: ret void 1235; 1236; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1237; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1238; NVPTX-NEXT: entry: 1239; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1240; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1241; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1242; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1243; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1244; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1245; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1246; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1247; NVPTX-NEXT: ret void 1248; 1249; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1250; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1251; AMDGPU-DISABLED-NEXT: entry: 1252; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1253; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1254; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1255; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1256; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1257; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1258; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1259; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1260; AMDGPU-DISABLED-NEXT: ret void 1261; 1262; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1263; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1264; NVPTX-DISABLED-NEXT: entry: 1265; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1266; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1267; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1268; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1269; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1270; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1271; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1272; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1273; NVPTX-DISABLED-NEXT: ret void 1274; 1275entry: 1276 %.addr1 = alloca i32, align 4 1277 %.zero.addr = alloca i32, align 4 1278 %global_args = alloca i8**, align 8 1279 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1280 store i32 0, i32* %.zero.addr, align 4 1281 call void @__kmpc_get_shared_variables(i8*** %global_args) 1282 %2 = load i8**, i8*** %global_args, align 8 1283 %3 = bitcast i8** %2 to i32** 1284 %4 = load i32*, i32** %3, align 8, !tbaa !26 1285 call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1286 ret void 1287} 1288 1289; Function Attrs: alwaysinline convergent norecurse nounwind 1290define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { 1291; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1292; AMDGPU-SAME: () #[[ATTR0]] { 1293; AMDGPU-NEXT: entry: 1294; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1295; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1296; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1297; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1298; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1299; AMDGPU: common.ret: 1300; AMDGPU-NEXT: ret void 1301; AMDGPU: user_code.entry: 1302; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1303; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1304; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1305; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1306; AMDGPU-NEXT: br label [[COMMON_RET]] 1307; 1308; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1309; NVPTX-SAME: () #[[ATTR0]] { 1310; NVPTX-NEXT: entry: 1311; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1312; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1313; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1314; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1315; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1316; NVPTX: common.ret: 1317; NVPTX-NEXT: ret void 1318; NVPTX: user_code.entry: 1319; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1320; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1321; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1322; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1323; NVPTX-NEXT: br label [[COMMON_RET]] 1324; 1325; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1326; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1327; AMDGPU-DISABLED-NEXT: entry: 1328; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1329; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1330; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1331; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1332; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1333; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1334; AMDGPU-DISABLED: is_worker_check: 1335; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1336; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1337; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1338; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1339; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1340; AMDGPU-DISABLED: worker_state_machine.begin: 1341; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1342; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1343; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1344; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1345; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1346; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1347; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1348; AMDGPU-DISABLED: worker_state_machine.finished: 1349; AMDGPU-DISABLED-NEXT: ret void 1350; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1351; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1352; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 1353; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1354; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1355; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 1356; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1357; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1358; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1359; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1360; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1361; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1362; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1363; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1364; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1365; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1366; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1367; AMDGPU-DISABLED: thread.user_code.check: 1368; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1369; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1370; AMDGPU-DISABLED: common.ret: 1371; AMDGPU-DISABLED-NEXT: ret void 1372; AMDGPU-DISABLED: user_code.entry: 1373; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1374; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1375; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1376; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1377; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1378; 1379; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1380; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1381; NVPTX-DISABLED-NEXT: entry: 1382; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1383; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1384; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1385; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1386; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1387; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1388; NVPTX-DISABLED: is_worker_check: 1389; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1390; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1391; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1392; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1393; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1394; NVPTX-DISABLED: worker_state_machine.begin: 1395; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1396; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1397; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1398; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1399; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1400; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1401; NVPTX-DISABLED: worker_state_machine.finished: 1402; NVPTX-DISABLED-NEXT: ret void 1403; NVPTX-DISABLED: worker_state_machine.is_active.check: 1404; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1405; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 1406; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1407; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1408; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 1409; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1410; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1411; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1412; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1413; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1414; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1415; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1416; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1417; NVPTX-DISABLED: worker_state_machine.done.barrier: 1418; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1419; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1420; NVPTX-DISABLED: thread.user_code.check: 1421; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1422; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1423; NVPTX-DISABLED: common.ret: 1424; NVPTX-DISABLED-NEXT: ret void 1425; NVPTX-DISABLED: user_code.entry: 1426; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1427; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1428; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1429; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1430; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1431; 1432entry: 1433 %.zero.addr = alloca i32, align 4 1434 %.threadid_temp. = alloca i32, align 4 1435 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1436 %exec_user_code = icmp eq i32 %0, -1 1437 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1438 1439common.ret: ; preds = %entry, %user_code.entry 1440 ret void 1441 1442user_code.entry: ; preds = %entry 1443 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1444 store i32 0, i32* %.zero.addr, align 4 1445 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1446 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6 1447 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1448 br label %common.ret 1449} 1450 1451; Function Attrs: alwaysinline convergent norecurse nounwind 1452define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1453; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 1454; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1455; AMDGPU-NEXT: entry: 1456; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1457; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* 1458; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] 1459; AMDGPU: region.check.tid: 1460; AMDGPU-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 1461; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1462; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1463; AMDGPU: region.guarded: 1464; AMDGPU-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1465; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] 1466; AMDGPU: region.guarded.end: 1467; AMDGPU-NEXT: br label [[REGION_BARRIER]] 1468; AMDGPU: region.barrier: 1469; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1470; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] 1471; AMDGPU: region.exit: 1472; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1473; AMDGPU: for.cond: 1474; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1475; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1476; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1477; AMDGPU: for.cond.cleanup: 1478; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 1479; AMDGPU-NEXT: ret void 1480; AMDGPU: for.body: 1481; AMDGPU-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1482; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1483; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1484; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1485; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) 1486; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1487; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1488; 1489; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 1490; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1491; NVPTX-NEXT: entry: 1492; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1493; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* 1494; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] 1495; NVPTX: region.check.tid: 1496; NVPTX-NEXT: [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block() 1497; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1498; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1499; NVPTX: region.guarded: 1500; NVPTX-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1501; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] 1502; NVPTX: region.guarded.end: 1503; NVPTX-NEXT: br label [[REGION_BARRIER]] 1504; NVPTX: region.barrier: 1505; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1506; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] 1507; NVPTX: region.exit: 1508; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1509; NVPTX: for.cond: 1510; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1511; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1512; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1513; NVPTX: for.cond.cleanup: 1514; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 1515; NVPTX-NEXT: ret void 1516; NVPTX: for.body: 1517; NVPTX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1518; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1519; NVPTX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1520; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1521; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) 1522; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1523; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1524; 1525; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1526; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1527; AMDGPU-DISABLED-NEXT: entry: 1528; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1529; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* 1530; AMDGPU-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1531; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1532; AMDGPU-DISABLED: for.cond: 1533; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1534; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1535; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1536; AMDGPU-DISABLED: for.cond.cleanup: 1537; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 1538; AMDGPU-DISABLED-NEXT: ret void 1539; AMDGPU-DISABLED: for.body: 1540; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1541; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1542; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1543; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1544; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) 1545; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1546; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1547; 1548; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1549; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1550; NVPTX-DISABLED-NEXT: entry: 1551; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1552; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* 1553; NVPTX-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1554; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1555; NVPTX-DISABLED: for.cond: 1556; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1557; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1558; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1559; NVPTX-DISABLED: for.cond.cleanup: 1560; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 1561; NVPTX-DISABLED-NEXT: ret void 1562; NVPTX-DISABLED: for.body: 1563; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1564; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1565; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1566; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1567; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) 1568; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1569; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1570; 1571entry: 1572 %captured_vars_addrs = alloca [1 x i8*], align 8 1573 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1574 %x_on_stack = bitcast i8* %x to i32* 1575 store i32 42, i32* %x_on_stack, align 4, !tbaa !18 1576 br label %for.cond 1577 1578for.cond: ; preds = %for.body, %entry 1579 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1580 %cmp = icmp slt i32 %i.0, 100 1581 br i1 %cmp, label %for.body, label %for.cond.cleanup 1582 1583for.cond.cleanup: ; preds = %for.cond 1584 call void @spmd_amenable() #10 1585 call void @__kmpc_free_shared(i8* %x, i64 4) 1586 ret void 1587 1588for.body: ; preds = %for.cond 1589 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1590 store i8* %x, i8** %0, align 8, !tbaa !26 1591 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1592 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1593 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 1) 1594 %inc = add nsw i32 %i.0, 1 1595 br label %for.cond, !llvm.loop !29 1596} 1597 1598; Function Attrs: alwaysinline convergent norecurse nounwind 1599define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1600; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 1601; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1602; AMDGPU-NEXT: entry: 1603; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1604; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1605; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1606; AMDGPU-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] 1607; AMDGPU-NEXT: ret void 1608; 1609; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 1610; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1611; NVPTX-NEXT: entry: 1612; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1613; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1614; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1615; NVPTX-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] 1616; NVPTX-NEXT: ret void 1617; 1618; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1619; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1620; AMDGPU-DISABLED-NEXT: entry: 1621; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1622; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1623; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1624; AMDGPU-DISABLED-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] 1625; AMDGPU-DISABLED-NEXT: ret void 1626; 1627; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1628; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1629; NVPTX-DISABLED-NEXT: entry: 1630; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1631; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1632; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1633; NVPTX-DISABLED-NEXT: call void @unknowni32p(i32* [[X]]) #[[ATTR8]] 1634; NVPTX-DISABLED-NEXT: ret void 1635; 1636entry: 1637 %0 = load i32, i32* %x, align 4, !tbaa !18 1638 %inc = add nsw i32 %0, 1 1639 store i32 %inc, i32* %x, align 4, !tbaa !18 1640 call void @unknowni32p(i32* %x) #11 1641 ret void 1642} 1643 1644; Function Attrs: convergent norecurse nounwind 1645define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { 1646; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1647; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1648; AMDGPU-NEXT: entry: 1649; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1650; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1651; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1652; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1653; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1654; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1655; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1656; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1657; AMDGPU-NEXT: ret void 1658; 1659; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1660; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1661; NVPTX-NEXT: entry: 1662; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1663; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1664; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1665; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1666; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1667; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1668; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1669; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1670; NVPTX-NEXT: ret void 1671; 1672; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1673; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1674; AMDGPU-DISABLED-NEXT: entry: 1675; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1676; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1677; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1678; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1679; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1680; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1681; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1682; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1683; AMDGPU-DISABLED-NEXT: ret void 1684; 1685; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1686; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 1687; NVPTX-DISABLED-NEXT: entry: 1688; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1689; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1690; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1691; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1692; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1693; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1694; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1695; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR4]] 1696; NVPTX-DISABLED-NEXT: ret void 1697; 1698entry: 1699 %.addr1 = alloca i32, align 4 1700 %.zero.addr = alloca i32, align 4 1701 %global_args = alloca i8**, align 8 1702 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1703 store i32 0, i32* %.zero.addr, align 4 1704 call void @__kmpc_get_shared_variables(i8*** %global_args) 1705 %2 = load i8**, i8*** %global_args, align 8 1706 %3 = bitcast i8** %2 to i32** 1707 %4 = load i32*, i32** %3, align 8, !tbaa !26 1708 call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1709 ret void 1710} 1711 1712; Function Attrs: alwaysinline convergent norecurse nounwind 1713define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { 1714; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1715; AMDGPU-SAME: () #[[ATTR0]] { 1716; AMDGPU-NEXT: entry: 1717; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1718; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1719; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1720; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1721; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1722; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1723; AMDGPU: is_worker_check: 1724; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1725; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1726; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1727; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1728; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1729; AMDGPU: worker_state_machine.begin: 1730; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1731; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1732; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1733; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1734; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1735; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1736; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1737; AMDGPU: worker_state_machine.finished: 1738; AMDGPU-NEXT: ret void 1739; AMDGPU: worker_state_machine.is_active.check: 1740; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1741; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1742; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1743; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1744; AMDGPU: worker_state_machine.parallel_region.end: 1745; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1746; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1747; AMDGPU: worker_state_machine.done.barrier: 1748; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1749; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1750; AMDGPU: thread.user_code.check: 1751; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1752; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1753; AMDGPU: common.ret: 1754; AMDGPU-NEXT: ret void 1755; AMDGPU: user_code.entry: 1756; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1757; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1758; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1759; AMDGPU-NEXT: br label [[COMMON_RET]] 1760; 1761; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1762; NVPTX-SAME: () #[[ATTR0]] { 1763; NVPTX-NEXT: entry: 1764; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1765; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1766; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1767; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1768; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1769; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1770; NVPTX: is_worker_check: 1771; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1772; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1773; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1774; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1775; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1776; NVPTX: worker_state_machine.begin: 1777; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1778; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1779; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1780; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1781; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1782; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1783; NVPTX: worker_state_machine.finished: 1784; NVPTX-NEXT: ret void 1785; NVPTX: worker_state_machine.is_active.check: 1786; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1787; NVPTX: worker_state_machine.parallel_region.fallback.execute: 1788; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1789; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1790; NVPTX: worker_state_machine.parallel_region.end: 1791; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 1792; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1793; NVPTX: worker_state_machine.done.barrier: 1794; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1795; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1796; NVPTX: thread.user_code.check: 1797; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1798; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1799; NVPTX: common.ret: 1800; NVPTX-NEXT: ret void 1801; NVPTX: user_code.entry: 1802; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1803; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1804; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1805; NVPTX-NEXT: br label [[COMMON_RET]] 1806; 1807; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1808; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1809; AMDGPU-DISABLED-NEXT: entry: 1810; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1811; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1812; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1813; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1814; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1815; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1816; AMDGPU-DISABLED: is_worker_check: 1817; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1818; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1819; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1820; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1821; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1822; AMDGPU-DISABLED: worker_state_machine.begin: 1823; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1824; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1825; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1826; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1827; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1828; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1829; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1830; AMDGPU-DISABLED: worker_state_machine.finished: 1831; AMDGPU-DISABLED-NEXT: ret void 1832; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1833; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1834; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1835; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1836; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1837; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1838; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1839; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1840; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1841; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1842; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1843; AMDGPU-DISABLED: thread.user_code.check: 1844; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1845; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1846; AMDGPU-DISABLED: common.ret: 1847; AMDGPU-DISABLED-NEXT: ret void 1848; AMDGPU-DISABLED: user_code.entry: 1849; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1850; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1851; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1852; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1853; 1854; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1855; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1856; NVPTX-DISABLED-NEXT: entry: 1857; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1858; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1859; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1860; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1861; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1862; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1863; NVPTX-DISABLED: is_worker_check: 1864; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1865; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1866; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1867; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1868; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1869; NVPTX-DISABLED: worker_state_machine.begin: 1870; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1871; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1872; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1873; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1874; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1875; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1876; NVPTX-DISABLED: worker_state_machine.finished: 1877; NVPTX-DISABLED-NEXT: ret void 1878; NVPTX-DISABLED: worker_state_machine.is_active.check: 1879; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1880; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1881; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1882; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1883; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1884; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1885; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1886; NVPTX-DISABLED: worker_state_machine.done.barrier: 1887; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1888; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1889; NVPTX-DISABLED: thread.user_code.check: 1890; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1891; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1892; NVPTX-DISABLED: common.ret: 1893; NVPTX-DISABLED-NEXT: ret void 1894; NVPTX-DISABLED: user_code.entry: 1895; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 1896; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 1897; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1898; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1899; 1900entry: 1901 %.zero.addr = alloca i32, align 4 1902 %.threadid_temp. = alloca i32, align 4 1903 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1904 %exec_user_code = icmp eq i32 %0, -1 1905 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1906 1907common.ret: ; preds = %entry, %user_code.entry 1908 ret void 1909 1910user_code.entry: ; preds = %entry 1911 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1912 store i32 0, i32* %.zero.addr, align 4 1913 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1914 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6 1915 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1916 br label %common.ret 1917} 1918 1919; Function Attrs: alwaysinline convergent norecurse nounwind 1920define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1921; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 1922; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1923; AMDGPU-NEXT: entry: 1924; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 1925; AMDGPU-NEXT: ret void 1926; 1927; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 1928; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1929; NVPTX-NEXT: entry: 1930; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 1931; NVPTX-NEXT: ret void 1932; 1933; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1934; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1935; AMDGPU-DISABLED-NEXT: entry: 1936; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 1937; AMDGPU-DISABLED-NEXT: ret void 1938; 1939; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1940; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1941; NVPTX-DISABLED-NEXT: entry: 1942; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 1943; NVPTX-DISABLED-NEXT: ret void 1944; 1945entry: 1946 call void @unknown() #11 1947 ret void 1948} 1949 1950; Function Attrs: alwaysinline convergent norecurse nounwind 1951define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { 1952; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 1953; AMDGPU-SAME: () #[[ATTR0]] { 1954; AMDGPU-NEXT: entry: 1955; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1956; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 1957; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1958; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1959; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1960; AMDGPU: is_worker_check: 1961; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1962; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1963; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1964; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1965; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1966; AMDGPU: worker_state_machine.begin: 1967; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1968; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1969; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1970; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1971; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1972; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1973; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1974; AMDGPU: worker_state_machine.finished: 1975; AMDGPU-NEXT: ret void 1976; AMDGPU: worker_state_machine.is_active.check: 1977; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1978; AMDGPU: worker_state_machine.parallel_region.check: 1979; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 1980; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1981; AMDGPU: worker_state_machine.parallel_region.execute: 1982; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 1983; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1984; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1985; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1986; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1987; AMDGPU: worker_state_machine.parallel_region.end: 1988; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1989; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1990; AMDGPU: worker_state_machine.done.barrier: 1991; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1992; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1993; AMDGPU: thread.user_code.check: 1994; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1995; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1996; AMDGPU: common.ret: 1997; AMDGPU-NEXT: ret void 1998; AMDGPU: user_code.entry: 1999; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 2000; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] 2001; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2002; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] 2003; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2004; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2005; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2006; AMDGPU-NEXT: br label [[COMMON_RET]] 2007; 2008; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2009; NVPTX-SAME: () #[[ATTR0]] { 2010; NVPTX-NEXT: entry: 2011; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 2012; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2013; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2014; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2015; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2016; NVPTX: is_worker_check: 2017; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2018; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2019; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2020; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2021; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2022; NVPTX: worker_state_machine.begin: 2023; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2024; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 2025; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 2026; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2027; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2028; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2029; NVPTX: worker_state_machine.finished: 2030; NVPTX-NEXT: ret void 2031; NVPTX: worker_state_machine.is_active.check: 2032; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2033; NVPTX: worker_state_machine.parallel_region.check: 2034; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2035; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2036; NVPTX: worker_state_machine.parallel_region.execute: 2037; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2038; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2039; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2040; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2041; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2042; NVPTX: worker_state_machine.parallel_region.end: 2043; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2044; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2045; NVPTX: worker_state_machine.done.barrier: 2046; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2047; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2048; NVPTX: thread.user_code.check: 2049; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2050; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2051; NVPTX: common.ret: 2052; NVPTX-NEXT: ret void 2053; NVPTX: user_code.entry: 2054; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 2055; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] 2056; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2057; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] 2058; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2059; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2060; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2061; NVPTX-NEXT: br label [[COMMON_RET]] 2062; 2063; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2064; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 2065; AMDGPU-DISABLED-NEXT: entry: 2066; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 2067; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2068; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2069; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2070; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2071; AMDGPU-DISABLED: is_worker_check: 2072; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2073; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2074; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2075; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2076; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2077; AMDGPU-DISABLED: worker_state_machine.begin: 2078; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2079; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 2080; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 2081; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2082; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2083; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2084; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2085; AMDGPU-DISABLED: worker_state_machine.finished: 2086; AMDGPU-DISABLED-NEXT: ret void 2087; AMDGPU-DISABLED: worker_state_machine.is_active.check: 2088; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2089; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 2090; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2091; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2092; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 2093; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2094; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2095; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2096; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2097; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2098; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 2099; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2100; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2101; AMDGPU-DISABLED: worker_state_machine.done.barrier: 2102; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2103; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2104; AMDGPU-DISABLED: thread.user_code.check: 2105; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2106; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2107; AMDGPU-DISABLED: common.ret: 2108; AMDGPU-DISABLED-NEXT: ret void 2109; AMDGPU-DISABLED: user_code.entry: 2110; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 2111; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] 2112; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2113; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] 2114; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2115; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2116; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2117; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 2118; 2119; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2120; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 2121; NVPTX-DISABLED-NEXT: entry: 2122; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 2123; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2124; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2125; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2126; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2127; NVPTX-DISABLED: is_worker_check: 2128; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2129; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2130; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2131; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2132; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2133; NVPTX-DISABLED: worker_state_machine.begin: 2134; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2135; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 2136; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 2137; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2138; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2139; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2140; NVPTX-DISABLED: worker_state_machine.finished: 2141; NVPTX-DISABLED-NEXT: ret void 2142; NVPTX-DISABLED: worker_state_machine.is_active.check: 2143; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2144; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 2145; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2146; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2147; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 2148; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2149; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2150; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2151; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2152; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2153; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 2154; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2155; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2156; NVPTX-DISABLED: worker_state_machine.done.barrier: 2157; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2158; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2159; NVPTX-DISABLED: thread.user_code.check: 2160; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2161; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2162; NVPTX-DISABLED: common.ret: 2163; NVPTX-DISABLED-NEXT: ret void 2164; NVPTX-DISABLED: user_code.entry: 2165; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] 2166; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR4]] 2167; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* 2168; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] 2169; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2170; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) 2171; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2172; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 2173; 2174entry: 2175 %captured_vars_addrs = alloca [0 x i8*], align 8 2176 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 2177 %exec_user_code = icmp eq i32 %0, -1 2178 br i1 %exec_user_code, label %user_code.entry, label %common.ret 2179 2180common.ret: ; preds = %entry, %user_code.entry 2181 ret void 2182 2183user_code.entry: ; preds = %entry 2184 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 2185 %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 %1, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) 2186 %3 = bitcast i8* %2 to %struct.kmp_task_t_with_privates* 2187 %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2) 2188 %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 2189 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0) 2190 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 2191 br label %common.ret 2192} 2193 2194; Function Attrs: alwaysinline convergent nounwind 2195define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { 2196; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. 2197; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 2198; AMDGPU-NEXT: entry: 2199; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] 2200; AMDGPU-NEXT: ret void 2201; 2202; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. 2203; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 2204; NVPTX-NEXT: entry: 2205; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] 2206; NVPTX-NEXT: ret void 2207; 2208; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2209; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 2210; AMDGPU-DISABLED-NEXT: entry: 2211; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 2212; AMDGPU-DISABLED-NEXT: ret void 2213; 2214; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2215; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { 2216; NVPTX-DISABLED-NEXT: entry: 2217; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] 2218; NVPTX-DISABLED-NEXT: ret void 2219; 2220entry: 2221 call void @spmd_amenable() #10 2222 ret void 2223} 2224 2225; Function Attrs: convergent norecurse nounwind 2226define internal i32 @"_omp_task_entry$"(i32 %0, %struct.kmp_task_t_with_privates* noalias %1) #3 { 2227entry: 2228 %2 = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* %1, i32 0, i32 0 2229 %3 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 2 2230 %4 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 0 2231 %5 = load i8*, i8** %4, align 8, !tbaa !30 2232 %6 = bitcast i8* %5 to %struct.anon* 2233 %7 = bitcast %struct.kmp_task_t_with_privates* %1 to i8* 2234 call void @.omp_outlined.(i32 %0, i32* %3, i8* null, void (i8*, ...)* null, i8* %7, %struct.anon* %6) #6 2235 ret i32 0 2236} 2237 2238; Function Attrs: nounwind 2239declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #6 2240 2241; Function Attrs: nounwind 2242declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #6 2243 2244; Function Attrs: nosync nounwind 2245declare void @__kmpc_free_shared(i8* nocapture, i64) #8 2246 2247; Function Attrs: nofree nosync nounwind 2248declare i8* @__kmpc_alloc_shared(i64) #7 2249 2250; Function Attrs: convergent 2251declare void @use(i32* nocapture) #5 2252 2253; Function Attrs: convergent 2254declare void @unknown() #2 2255declare void @unknowni32p(i32*) #2 2256 2257; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2258declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 2259 2260declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) 2261 2262declare void @__kmpc_get_shared_variables(i8***) 2263 2264; Function Attrs: alwaysinline 2265declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4 2266 2267; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2268declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 2269 2270; Function Attrs: convergent 2271declare void @spmd_amenable() #5 2272 2273; Function Attrs: nounwind 2274declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6 2275 2276declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) 2277 2278 2279; Function Attrs: alwaysinline convergent norecurse nounwind 2280define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 2281; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 2282; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2283; AMDGPU-NEXT: entry: 2284; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] 2285; AMDGPU-NEXT: ret void 2286; 2287; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 2288; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2289; NVPTX-NEXT: entry: 2290; NVPTX-NEXT: call void @unknown() #[[ATTR8]] 2291; NVPTX-NEXT: ret void 2292; 2293; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2294; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2295; AMDGPU-DISABLED-NEXT: entry: 2296; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 2297; AMDGPU-DISABLED-NEXT: ret void 2298; 2299; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2300; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2301; NVPTX-DISABLED-NEXT: entry: 2302; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] 2303; NVPTX-DISABLED-NEXT: ret void 2304; 2305entry: 2306 call void @unknown() #11 2307 ret void 2308} 2309 2310; Function Attrs: convergent norecurse nounwind 2311define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { 2312; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2313; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2314; AMDGPU-NEXT: entry: 2315; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2316; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2317; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2318; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2319; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 2320; AMDGPU-NEXT: ret void 2321; 2322; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2323; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2324; NVPTX-NEXT: entry: 2325; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2326; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2327; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2328; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2329; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 2330; NVPTX-NEXT: ret void 2331; 2332; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2333; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2334; AMDGPU-DISABLED-NEXT: entry: 2335; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2336; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2337; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2338; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2339; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 2340; AMDGPU-DISABLED-NEXT: ret void 2341; 2342; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2343; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] { 2344; NVPTX-DISABLED-NEXT: entry: 2345; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2346; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2347; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2348; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2349; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] 2350; NVPTX-DISABLED-NEXT: ret void 2351; 2352entry: 2353 %.addr1 = alloca i32, align 4 2354 %.zero.addr = alloca i32, align 4 2355 %global_args = alloca i8**, align 8 2356 store i32 %1, i32* %.addr1, align 4, !tbaa !18 2357 store i32 0, i32* %.zero.addr, align 4 2358 call void @__kmpc_get_shared_variables(i8*** %global_args) 2359 call void @__omp_outlined__9(i32* %.addr1, i32* %.zero.addr) #6 2360 ret void 2361} 2362 2363declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block(); 2364 2365attributes #0 = { alwaysinline convergent norecurse nounwind } 2366attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } 2367attributes #2 = { convergent } 2368attributes #3 = { convergent norecurse nounwind } 2369attributes #4 = { alwaysinline } 2370attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2371attributes #6 = { nounwind } 2372attributes #7 = { nofree nosync nounwind } 2373attributes #8 = { nosync nounwind } 2374attributes #9 = { alwaysinline convergent nounwind } 2375attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2376attributes #11 = { convergent } 2377 2378!omp_offload.info = !{!0, !1, !2, !3, !4, !5} 2379!nvvm.annotations = !{!6, !7, !8, !9, !10, !11} 2380!llvm.module.flags = !{!12, !13, !14, !15, !16} 2381!llvm.ident = !{!17} 2382 2383!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2384!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2385!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2386!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2387!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2388!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2389!6 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2390!7 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2391!8 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2392!9 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2393!10 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2394!11 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2395!12 = !{i32 1, !"wchar_size", i32 4} 2396!13 = !{i32 7, !"openmp", i32 50} 2397!14 = !{i32 7, !"openmp-device", i32 50} 2398!15 = !{i32 7, !"PIC Level", i32 2} 2399!16 = !{i32 7, !"frame-pointer", i32 2} 2400!17 = !{!"clang version 14.0.0"} 2401!18 = !{!19, !19, i64 0} 2402!19 = !{!"int", !20, i64 0} 2403!20 = !{!"omnipotent char", !21, i64 0} 2404!21 = !{!"Simple C/C++ TBAA"} 2405!22 = distinct !{!22, !23, !24} 2406!23 = !{!"llvm.loop.mustprogress"} 2407!24 = !{!"llvm.loop.unroll.disable"} 2408!25 = distinct !{!25, !23, !24} 2409!26 = !{!27, !27, i64 0} 2410!27 = !{!"any pointer", !20, i64 0} 2411!28 = distinct !{!28, !23, !24} 2412!29 = distinct !{!29, !23, !24} 2413!30 = !{!31, !27, i64 0} 2414!31 = !{!"kmp_task_t_with_privates", !32, i64 0} 2415!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2416;. 2417; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2418; AMDGPU: attributes #[[ATTR1]] = { norecurse } 2419; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind } 2420; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 2421; AMDGPU: attributes #[[ATTR4]] = { nounwind } 2422; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 2423; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2424; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2425; AMDGPU: attributes #[[ATTR8]] = { convergent } 2426; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } 2427; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 2428; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 2429;. 2430; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2431; NVPTX: attributes #[[ATTR1]] = { norecurse } 2432; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind } 2433; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 2434; NVPTX: attributes #[[ATTR4]] = { nounwind } 2435; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 2436; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2437; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2438; NVPTX: attributes #[[ATTR8]] = { convergent } 2439; NVPTX: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } 2440; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 2441; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 2442;. 2443; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2444; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse } 2445; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } 2446; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 2447; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind } 2448; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 2449; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2450; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2451; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } 2452; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } 2453; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 2454; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 2455;. 2456; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2457; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse } 2458; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } 2459; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } 2460; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind } 2461; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } 2462; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } 2463; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2464; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent } 2465; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } 2466; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } 2467; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } 2468;. 2469; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2470; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2471; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2472; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2473; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2474; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2475; AMDGPU: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2476; AMDGPU: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2477; AMDGPU: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2478; AMDGPU: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2479; AMDGPU: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2480; AMDGPU: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2481; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2482; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2483; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2484; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2485; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2486; AMDGPU: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2487; AMDGPU: [[TBAA18]] = !{!19, !19, i64 0} 2488; AMDGPU: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2489; AMDGPU: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2490; AMDGPU: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2491; AMDGPU: [[LOOP22]] = distinct !{!22, !23, !24} 2492; AMDGPU: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2493; AMDGPU: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2494; AMDGPU: [[LOOP25]] = distinct !{!25, !23, !24} 2495; AMDGPU: [[TBAA26]] = !{!27, !27, i64 0} 2496; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2497; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24} 2498; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24} 2499; AMDGPU: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2500; AMDGPU: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2501; AMDGPU: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2502;. 2503; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2504; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2505; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2506; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2507; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2508; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2509; NVPTX: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2510; NVPTX: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2511; NVPTX: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2512; NVPTX: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2513; NVPTX: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2514; NVPTX: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2515; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2516; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2517; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2518; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2519; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2520; NVPTX: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2521; NVPTX: [[TBAA18]] = !{!19, !19, i64 0} 2522; NVPTX: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2523; NVPTX: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2524; NVPTX: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2525; NVPTX: [[LOOP22]] = distinct !{!22, !23, !24} 2526; NVPTX: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2527; NVPTX: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2528; NVPTX: [[LOOP25]] = distinct !{!25, !23, !24} 2529; NVPTX: [[TBAA26]] = !{!27, !27, i64 0} 2530; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2531; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24} 2532; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24} 2533; NVPTX: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2534; NVPTX: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2535; NVPTX: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2536;. 2537; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2538; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2539; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2540; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2541; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2542; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2543; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2544; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2545; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2546; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2547; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2548; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2549; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2550; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2551; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2552; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2553; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2554; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2555; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2556; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2557; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2558; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2559; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2560; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2561; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2562; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2563; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2564; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2565; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2566; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2567; AMDGPU-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2568; AMDGPU-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2569; AMDGPU-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2570;. 2571; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2572; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2573; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2574; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2575; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2576; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2577; NVPTX-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2578; NVPTX-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2579; NVPTX-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2580; NVPTX-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2581; NVPTX-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2582; NVPTX-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2583; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2584; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2585; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2586; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2587; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2588; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2589; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2590; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2591; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2592; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2593; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2594; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2595; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2596; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2597; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2598; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2599; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2600; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2601; NVPTX-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2602; NVPTX-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2603; NVPTX-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2604;. 2605