1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU 3; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX 4; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED 5; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED 6 7;; void unknown(void); 8;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable"))); 9;; 10;; void sequential_loop() { 11;; #pragma omp target teams 12;; { 13;; for (int i = 0; i < 100; ++i) { 14;; #pragma omp parallel 15;; { 16;; unknown(); 17;; } 18;; } 19;; spmd_amenable(); 20;; } 21;; } 22;; 23;; void use(__attribute__((noescape)) int *) __attribute__((assume("ompx_spmd_amenable"))); 24;; 25;; void sequential_loop_to_stack_var() { 26;; #pragma omp target teams 27;; { 28;; int x; 29;; use(&x); 30;; for (int i = 0; i < 100; ++i) { 31;; #pragma omp parallel 32;; { 33;; unknown(); 34;; } 35;; } 36;; spmd_amenable(); 37;; } 38;; } 39;; 40;; void sequential_loop_to_shared_var() { 41;; #pragma omp target teams 42;; { 43;; int x; 44;; for (int i = 0; i < 100; ++i) { 45;; #pragma omp parallel 46;; { 47;; x++; 48;; unknown(); 49;; } 50;; } 51;; spmd_amenable(); 52;; } 53;; } 54;; 55;; void sequential_loop_to_shared_var_guarded() { 56;; #pragma omp target teams 57;; { 58;; int x = 42; 59;; for (int i = 0; i < 100; ++i) { 60;; #pragma omp parallel 61;; { 62;; x++; 63;; unknown(); 64;; } 65;; } 66;; spmd_amenable(); 67;; } 68;; } 69;; 70;; void do_not_spmdize_target() { 71;; #pragma omp target teams 72;; { 73;; // Incompatible parallel level, called both 74;; // from parallel and target regions 75;; unknown(); 76;; } 77;; } 78;; 79;; void do_not_spmdize_task() { 80;; #pragma omp target 81;; { 82;; #pragma omp task 83;; spmd_amenable(); 84;; #pragma omp parallel 85;; unknown(); 86;; } 87;; } 88 89%struct.ident_t = type { i32, i32, i32, i32, i8* } 90%struct.kmp_task_t_with_privates = type { %struct.kmp_task_t } 91%struct.kmp_task_t = type { i8*, i32 (i32, i8*)*, i32, %union.kmp_cmplrdata_t, %union.kmp_cmplrdata_t } 92%union.kmp_cmplrdata_t = type { i32 (i32, i8*)* } 93%struct.anon = type {} 94 95@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 96@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 97@__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode = weak constant i8 1 98@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode = weak constant i8 1 99@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode = weak constant i8 1 100@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode = weak constant i8 1 101@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode = weak constant i8 1 102@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode = weak constant i8 1 103@llvm.compiler.used = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 104 105; Function Attrs: alwaysinline convergent norecurse nounwind 106;. 107; AMDGPU: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 108; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 109; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 110; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 111; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 112; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 113; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 114; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 115; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 116; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 117; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 118; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 119; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 120;. 121; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 122; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 123; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 124; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 125; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 126; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 127; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 128; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 129; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 130; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 131; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 132; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 133; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 134;. 135; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 136; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 137; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 138; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 139; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 140; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 141; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 142; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 143; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 144; AMDGPU-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 145; AMDGPU-DISABLED: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 146; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 147; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 148; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 149; AMDGPU-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 150; AMDGPU-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 151;. 152; NVPTX-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" 153; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 154; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 155; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 156; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 157; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 158; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 159; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 160; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" 161; NVPTX-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 162; NVPTX-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 163; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 164; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 165; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 166; NVPTX-DISABLED: @[[__OMP_OUTLINED__7_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 167; NVPTX-DISABLED: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef 168;. 169define weak void @__omp_offloading_fd02_2044372e_sequential_loop_l5() #0 { 170; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 171; AMDGPU-SAME: () #[[ATTR0:[0-9]+]] { 172; AMDGPU-NEXT: entry: 173; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 174; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 175; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 176; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 177; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 178; AMDGPU: common.ret: 179; AMDGPU-NEXT: ret void 180; AMDGPU: user_code.entry: 181; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 182; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 183; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 184; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 185; AMDGPU-NEXT: br label [[COMMON_RET]] 186; 187; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 188; NVPTX-SAME: () #[[ATTR0:[0-9]+]] { 189; NVPTX-NEXT: entry: 190; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 191; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 192; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 193; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 194; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 195; NVPTX: common.ret: 196; NVPTX-NEXT: ret void 197; NVPTX: user_code.entry: 198; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 199; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 200; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 201; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 202; NVPTX-NEXT: br label [[COMMON_RET]] 203; 204; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 205; AMDGPU-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 206; AMDGPU-DISABLED-NEXT: entry: 207; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 208; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 209; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 210; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 211; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 212; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 213; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 214; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 215; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 216; AMDGPU-DISABLED: is_worker_check: 217; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 218; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 219; AMDGPU-DISABLED: worker_state_machine.begin: 220; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 221; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 222; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 223; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 224; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 225; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 226; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 227; AMDGPU-DISABLED: worker_state_machine.finished: 228; AMDGPU-DISABLED-NEXT: ret void 229; AMDGPU-DISABLED: worker_state_machine.is_active.check: 230; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 231; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 232; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 233; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 234; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 235; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 236; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 237; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 238; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 239; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 240; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 241; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 242; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 243; AMDGPU-DISABLED: worker_state_machine.done.barrier: 244; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 245; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 246; AMDGPU-DISABLED: thread.user_code.check: 247; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 248; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 249; AMDGPU-DISABLED: common.ret: 250; AMDGPU-DISABLED-NEXT: ret void 251; AMDGPU-DISABLED: user_code.entry: 252; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 253; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 254; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 255; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 256; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 257; 258; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5 259; NVPTX-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { 260; NVPTX-DISABLED-NEXT: entry: 261; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 262; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 263; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 264; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 265; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 266; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 267; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 268; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 269; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 270; NVPTX-DISABLED: is_worker_check: 271; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 272; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 273; NVPTX-DISABLED: worker_state_machine.begin: 274; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 275; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 276; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 277; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 278; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 279; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 280; NVPTX-DISABLED: worker_state_machine.finished: 281; NVPTX-DISABLED-NEXT: ret void 282; NVPTX-DISABLED: worker_state_machine.is_active.check: 283; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 284; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 285; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 286; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 287; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 288; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP0]]) 289; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 290; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 291; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 292; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 293; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 294; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 295; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 296; NVPTX-DISABLED: worker_state_machine.done.barrier: 297; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 298; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 299; NVPTX-DISABLED: thread.user_code.check: 300; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 301; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 302; NVPTX-DISABLED: common.ret: 303; NVPTX-DISABLED-NEXT: ret void 304; NVPTX-DISABLED: user_code.entry: 305; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] 306; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] 307; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 308; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 309; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 310; 311entry: 312 %.zero.addr = alloca i32, align 4 313 %.threadid_temp. = alloca i32, align 4 314 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 315 %exec_user_code = icmp eq i32 %0, -1 316 br i1 %exec_user_code, label %user_code.entry, label %common.ret 317 318common.ret: ; preds = %entry, %user_code.entry 319 ret void 320 321user_code.entry: ; preds = %entry 322 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 323 store i32 0, i32* %.zero.addr, align 4 324 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 325 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6 326 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 327 br label %common.ret 328} 329 330; Function Attrs: alwaysinline convergent norecurse nounwind 331define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 332; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ 333; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 334; AMDGPU-NEXT: entry: 335; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 336; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 337; AMDGPU: for.cond: 338; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 339; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 340; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 341; AMDGPU: for.cond.cleanup: 342; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 343; AMDGPU-NEXT: ret void 344; AMDGPU: for.body: 345; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 346; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 347; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) 348; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 349; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 350; 351; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ 352; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 353; NVPTX-NEXT: entry: 354; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 355; NVPTX-NEXT: br label [[FOR_COND:%.*]] 356; NVPTX: for.cond: 357; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 358; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 359; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 360; NVPTX: for.cond.cleanup: 361; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 362; NVPTX-NEXT: ret void 363; NVPTX: for.body: 364; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 365; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 366; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) 367; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 368; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 369; 370; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 371; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 372; AMDGPU-DISABLED-NEXT: entry: 373; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 374; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 375; AMDGPU-DISABLED: for.cond: 376; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 377; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 378; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 379; AMDGPU-DISABLED: for.cond.cleanup: 380; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 381; AMDGPU-DISABLED-NEXT: ret void 382; AMDGPU-DISABLED: for.body: 383; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 384; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 385; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) 386; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 387; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 388; 389; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ 390; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 391; NVPTX-DISABLED-NEXT: entry: 392; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 393; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 394; NVPTX-DISABLED: for.cond: 395; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 396; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 397; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 398; NVPTX-DISABLED: for.cond.cleanup: 399; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] 400; NVPTX-DISABLED-NEXT: ret void 401; NVPTX-DISABLED: for.body: 402; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 403; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 404; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) 405; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 406; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] 407; 408entry: 409 %captured_vars_addrs = alloca [0 x i8*], align 8 410 br label %for.cond 411 412for.cond: ; preds = %for.body, %entry 413 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 414 %cmp = icmp slt i32 %i.0, 100 415 br i1 %cmp, label %for.body, label %for.cond.cleanup 416 417for.cond.cleanup: ; preds = %for.cond 418 call void @spmd_amenable() #10 419 ret void 420 421for.body: ; preds = %for.cond 422 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 423 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 424 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0) 425 %inc = add nsw i32 %i.0, 1 426 br label %for.cond, !llvm.loop !22 427} 428 429; Function Attrs: alwaysinline convergent norecurse nounwind 430define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 431; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 432; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 433; AMDGPU-NEXT: entry: 434; AMDGPU-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 435; AMDGPU-NEXT: ret void 436; 437; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 438; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 439; NVPTX-NEXT: entry: 440; NVPTX-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 441; NVPTX-NEXT: ret void 442; 443; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 444; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 445; AMDGPU-DISABLED-NEXT: entry: 446; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 447; AMDGPU-DISABLED-NEXT: ret void 448; 449; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 450; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 451; NVPTX-DISABLED-NEXT: entry: 452; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] 453; NVPTX-DISABLED-NEXT: ret void 454; 455entry: 456 call void @unknown() #11 457 ret void 458} 459 460; Function Attrs: convergent norecurse nounwind 461define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { 462; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 463; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 464; AMDGPU-NEXT: entry: 465; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 466; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 467; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 468; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 469; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 470; AMDGPU-NEXT: ret void 471; 472; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 473; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 474; NVPTX-NEXT: entry: 475; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 476; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 477; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 478; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 479; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 480; NVPTX-NEXT: ret void 481; 482; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 483; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 484; AMDGPU-DISABLED-NEXT: entry: 485; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 486; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 487; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 488; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 489; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 490; AMDGPU-DISABLED-NEXT: ret void 491; 492; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 493; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { 494; NVPTX-DISABLED-NEXT: entry: 495; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 496; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 497; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 498; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 499; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 500; NVPTX-DISABLED-NEXT: ret void 501; 502entry: 503 %.addr1 = alloca i32, align 4 504 %.zero.addr = alloca i32, align 4 505 %global_args = alloca i8**, align 8 506 store i32 %1, i32* %.addr1, align 4, !tbaa !18 507 store i32 0, i32* %.zero.addr, align 4 508 call void @__kmpc_get_shared_variables(i8*** %global_args) 509 call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #6 510 ret void 511} 512 513; Function Attrs: alwaysinline convergent norecurse nounwind 514define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20() #0 { 515; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 516; AMDGPU-SAME: () #[[ATTR0]] { 517; AMDGPU-NEXT: entry: 518; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 519; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 520; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 521; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 522; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 523; AMDGPU: common.ret: 524; AMDGPU-NEXT: ret void 525; AMDGPU: user_code.entry: 526; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 527; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 528; AMDGPU-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 529; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 530; AMDGPU-NEXT: br label [[COMMON_RET]] 531; 532; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 533; NVPTX-SAME: () #[[ATTR0]] { 534; NVPTX-NEXT: entry: 535; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 536; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 537; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 538; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 539; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 540; NVPTX: common.ret: 541; NVPTX-NEXT: ret void 542; NVPTX: user_code.entry: 543; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 544; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 545; NVPTX-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 546; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 547; NVPTX-NEXT: br label [[COMMON_RET]] 548; 549; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 550; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 551; AMDGPU-DISABLED-NEXT: entry: 552; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 553; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 554; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 555; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 556; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 557; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 558; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 559; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 560; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 561; AMDGPU-DISABLED: is_worker_check: 562; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 563; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 564; AMDGPU-DISABLED: worker_state_machine.begin: 565; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 566; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 567; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 568; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 569; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 570; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 571; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 572; AMDGPU-DISABLED: worker_state_machine.finished: 573; AMDGPU-DISABLED-NEXT: ret void 574; AMDGPU-DISABLED: worker_state_machine.is_active.check: 575; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 576; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 577; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 578; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 579; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 580; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 581; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 582; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 583; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 584; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 585; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 586; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 587; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 588; AMDGPU-DISABLED: worker_state_machine.done.barrier: 589; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 590; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 591; AMDGPU-DISABLED: thread.user_code.check: 592; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 593; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 594; AMDGPU-DISABLED: common.ret: 595; AMDGPU-DISABLED-NEXT: ret void 596; AMDGPU-DISABLED: user_code.entry: 597; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 598; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 599; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 600; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 601; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 602; 603; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 604; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 605; NVPTX-DISABLED-NEXT: entry: 606; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 607; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 608; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 609; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 610; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 611; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 612; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 613; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 614; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 615; NVPTX-DISABLED: is_worker_check: 616; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 617; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 618; NVPTX-DISABLED: worker_state_machine.begin: 619; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 620; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 621; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 622; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 623; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 624; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 625; NVPTX-DISABLED: worker_state_machine.finished: 626; NVPTX-DISABLED-NEXT: ret void 627; NVPTX-DISABLED: worker_state_machine.is_active.check: 628; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 629; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 630; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__3_wrapper.ID to void (i16, i32)*) 631; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 632; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 633; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]]) 634; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 635; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 636; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 637; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 638; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 639; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 640; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 641; NVPTX-DISABLED: worker_state_machine.done.barrier: 642; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 643; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 644; NVPTX-DISABLED: thread.user_code.check: 645; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 646; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 647; NVPTX-DISABLED: common.ret: 648; NVPTX-DISABLED-NEXT: ret void 649; NVPTX-DISABLED: user_code.entry: 650; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 651; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 652; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 653; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 654; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 655; 656entry: 657 %.zero.addr = alloca i32, align 4 658 %.threadid_temp. = alloca i32, align 4 659 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 660 %exec_user_code = icmp eq i32 %0, -1 661 br i1 %exec_user_code, label %user_code.entry, label %common.ret 662 663common.ret: ; preds = %entry, %user_code.entry 664 ret void 665 666user_code.entry: ; preds = %entry 667 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 668 store i32 0, i32* %.zero.addr, align 4 669 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 670 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6 671 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 672 br label %common.ret 673} 674 675; Function Attrs: alwaysinline convergent norecurse nounwind 676define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 677; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 678; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 679; AMDGPU-NEXT: entry: 680; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 681; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 682; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 683; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 684; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 685; AMDGPU: for.cond: 686; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 687; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 688; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 689; AMDGPU: for.cond.cleanup: 690; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 691; AMDGPU-NEXT: ret void 692; AMDGPU: for.body: 693; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 694; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 695; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) 696; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 697; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 698; 699; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 700; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 701; NVPTX-NEXT: entry: 702; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 703; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 704; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 705; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 706; NVPTX-NEXT: br label [[FOR_COND:%.*]] 707; NVPTX: for.cond: 708; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 709; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 710; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 711; NVPTX: for.cond.cleanup: 712; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 713; NVPTX-NEXT: ret void 714; NVPTX: for.body: 715; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 716; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 717; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) 718; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 719; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 720; 721; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 722; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 723; AMDGPU-DISABLED-NEXT: entry: 724; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 725; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 726; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 727; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 728; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 729; AMDGPU-DISABLED: for.cond: 730; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 731; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 732; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 733; AMDGPU-DISABLED: for.cond.cleanup: 734; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 735; AMDGPU-DISABLED-NEXT: ret void 736; AMDGPU-DISABLED: for.body: 737; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 738; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 739; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) 740; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 741; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 742; 743; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 744; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 745; NVPTX-DISABLED-NEXT: entry: 746; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 747; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 748; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* 749; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] 750; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 751; NVPTX-DISABLED: for.cond: 752; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 753; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 754; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 755; NVPTX-DISABLED: for.cond.cleanup: 756; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 757; NVPTX-DISABLED-NEXT: ret void 758; NVPTX-DISABLED: for.body: 759; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 760; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 761; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) 762; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 763; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] 764; 765entry: 766 %captured_vars_addrs = alloca [0 x i8*], align 8 767 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 768 %x_on_stack = bitcast i8* %x to i32* 769 call void @use(i32* nocapture %x_on_stack) #10 770 br label %for.cond 771 772for.cond: ; preds = %for.body, %entry 773 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 774 %cmp = icmp slt i32 %i.0, 100 775 br i1 %cmp, label %for.body, label %for.cond.cleanup 776 777for.cond.cleanup: ; preds = %for.cond 778 call void @spmd_amenable() #10 779 call void @__kmpc_free_shared(i8* %x, i64 4) 780 ret void 781 782for.body: ; preds = %for.cond 783 %0 = load i32, i32* %.global_tid., align 4, !tbaa !18 784 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 785 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) 786 %inc = add nsw i32 %i.0, 1 787 br label %for.cond, !llvm.loop !25 788} 789; Function Attrs: alwaysinline convergent norecurse nounwind 790define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 791; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 792; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 793; AMDGPU-NEXT: entry: 794; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 795; AMDGPU-NEXT: ret void 796; 797; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 798; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 799; NVPTX-NEXT: entry: 800; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 801; NVPTX-NEXT: ret void 802; 803; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 804; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 805; AMDGPU-DISABLED-NEXT: entry: 806; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 807; AMDGPU-DISABLED-NEXT: ret void 808; 809; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 810; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 811; NVPTX-DISABLED-NEXT: entry: 812; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 813; NVPTX-DISABLED-NEXT: ret void 814; 815entry: 816 call void @unknown() #11 817 ret void 818} 819 820; Function Attrs: convergent norecurse nounwind 821define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { 822; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 823; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 824; AMDGPU-NEXT: entry: 825; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 826; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 827; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 828; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 829; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 830; AMDGPU-NEXT: ret void 831; 832; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 833; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 834; NVPTX-NEXT: entry: 835; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 836; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 837; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 838; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 839; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 840; NVPTX-NEXT: ret void 841; 842; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 843; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 844; AMDGPU-DISABLED-NEXT: entry: 845; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 846; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 847; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 848; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 849; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 850; AMDGPU-DISABLED-NEXT: ret void 851; 852; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 853; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 854; NVPTX-DISABLED-NEXT: entry: 855; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 856; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 857; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 858; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 859; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 860; NVPTX-DISABLED-NEXT: ret void 861; 862entry: 863 %.addr1 = alloca i32, align 4 864 %.zero.addr = alloca i32, align 4 865 %global_args = alloca i8**, align 8 866 store i32 %1, i32* %.addr1, align 4, !tbaa !18 867 store i32 0, i32* %.zero.addr, align 4 868 call void @__kmpc_get_shared_variables(i8*** %global_args) 869 call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #6 870 ret void 871} 872 873 874; Function Attrs: alwaysinline convergent norecurse nounwind 875define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35() #0 { 876; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 877; AMDGPU-SAME: () #[[ATTR0]] { 878; AMDGPU-NEXT: entry: 879; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 880; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 881; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 882; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 883; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 884; AMDGPU: common.ret: 885; AMDGPU-NEXT: ret void 886; AMDGPU: user_code.entry: 887; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 888; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 889; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 890; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 891; AMDGPU-NEXT: br label [[COMMON_RET]] 892; 893; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 894; NVPTX-SAME: () #[[ATTR0]] { 895; NVPTX-NEXT: entry: 896; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 897; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 898; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 899; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 900; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 901; NVPTX: common.ret: 902; NVPTX-NEXT: ret void 903; NVPTX: user_code.entry: 904; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 905; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 906; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 907; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 908; NVPTX-NEXT: br label [[COMMON_RET]] 909; 910; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 911; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 912; AMDGPU-DISABLED-NEXT: entry: 913; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 914; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 915; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 916; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 917; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 918; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 919; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 920; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 921; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 922; AMDGPU-DISABLED: is_worker_check: 923; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 924; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 925; AMDGPU-DISABLED: worker_state_machine.begin: 926; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 927; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 928; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 929; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 930; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 931; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 932; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 933; AMDGPU-DISABLED: worker_state_machine.finished: 934; AMDGPU-DISABLED-NEXT: ret void 935; AMDGPU-DISABLED: worker_state_machine.is_active.check: 936; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 937; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 938; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 939; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 940; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 941; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 942; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 943; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 944; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 945; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 946; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 947; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 948; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 949; AMDGPU-DISABLED: worker_state_machine.done.barrier: 950; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 951; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 952; AMDGPU-DISABLED: thread.user_code.check: 953; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 954; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 955; AMDGPU-DISABLED: common.ret: 956; AMDGPU-DISABLED-NEXT: ret void 957; AMDGPU-DISABLED: user_code.entry: 958; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 959; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 960; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 961; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 962; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 963; 964; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 965; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 966; NVPTX-DISABLED-NEXT: entry: 967; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 968; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 969; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 970; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 971; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 972; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 973; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 974; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 975; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 976; NVPTX-DISABLED: is_worker_check: 977; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 978; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 979; NVPTX-DISABLED: worker_state_machine.begin: 980; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 981; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 982; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 983; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 984; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 985; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 986; NVPTX-DISABLED: worker_state_machine.finished: 987; NVPTX-DISABLED-NEXT: ret void 988; NVPTX-DISABLED: worker_state_machine.is_active.check: 989; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 990; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 991; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__5_wrapper.ID to void (i16, i32)*) 992; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 993; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 994; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]]) 995; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 996; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 997; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 998; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 999; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1000; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1001; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1002; NVPTX-DISABLED: worker_state_machine.done.barrier: 1003; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1004; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1005; NVPTX-DISABLED: thread.user_code.check: 1006; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1007; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1008; NVPTX-DISABLED: common.ret: 1009; NVPTX-DISABLED-NEXT: ret void 1010; NVPTX-DISABLED: user_code.entry: 1011; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1012; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1013; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1014; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1015; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1016; 1017entry: 1018 %.zero.addr = alloca i32, align 4 1019 %.threadid_temp. = alloca i32, align 4 1020 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1021 %exec_user_code = icmp eq i32 %0, -1 1022 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1023 1024common.ret: ; preds = %entry, %user_code.entry 1025 ret void 1026 1027user_code.entry: ; preds = %entry 1028 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1029 store i32 0, i32* %.zero.addr, align 4 1030 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1031 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6 1032 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1033 br label %common.ret 1034} 1035 1036; Function Attrs: alwaysinline convergent norecurse nounwind 1037define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1038; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 1039; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1040; AMDGPU-NEXT: entry: 1041; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1042; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1043; AMDGPU: for.cond: 1044; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1045; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1046; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1047; AMDGPU: for.cond.cleanup: 1048; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 1049; AMDGPU-NEXT: ret void 1050; AMDGPU: for.body: 1051; AMDGPU-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1052; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1053; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1054; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1055; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1) 1056; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1057; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1058; 1059; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 1060; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1061; NVPTX-NEXT: entry: 1062; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1063; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1064; NVPTX: for.cond: 1065; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1066; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1067; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1068; NVPTX: for.cond.cleanup: 1069; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 1070; NVPTX-NEXT: ret void 1071; NVPTX: for.body: 1072; NVPTX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1073; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1074; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1075; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1076; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1) 1077; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1078; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1079; 1080; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1081; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1082; AMDGPU-DISABLED-NEXT: entry: 1083; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1084; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1085; AMDGPU-DISABLED: for.cond: 1086; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1087; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1088; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1089; AMDGPU-DISABLED: for.cond.cleanup: 1090; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1091; AMDGPU-DISABLED-NEXT: ret void 1092; AMDGPU-DISABLED: for.body: 1093; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1094; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1095; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1096; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1097; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) 1098; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1099; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1100; 1101; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 1102; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1103; NVPTX-DISABLED-NEXT: entry: 1104; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1105; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1106; NVPTX-DISABLED: for.cond: 1107; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1108; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1109; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1110; NVPTX-DISABLED: for.cond.cleanup: 1111; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1112; NVPTX-DISABLED-NEXT: ret void 1113; NVPTX-DISABLED: for.body: 1114; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1115; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] 1116; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1117; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1118; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) 1119; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1120; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] 1121; 1122entry: 1123 %captured_vars_addrs = alloca [1 x i8*], align 8 1124 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1125 %x_on_stack = bitcast i8* %x to i32* 1126 br label %for.cond 1127 1128for.cond: ; preds = %for.body, %entry 1129 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1130 %cmp = icmp slt i32 %i.0, 100 1131 br i1 %cmp, label %for.body, label %for.cond.cleanup 1132 1133for.cond.cleanup: ; preds = %for.cond 1134 call void @spmd_amenable() #10 1135 call void @__kmpc_free_shared(i8* %x, i64 4) 1136 ret void 1137 1138for.body: ; preds = %for.cond 1139 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1140 store i8* %x, i8** %0, align 8, !tbaa !26 1141 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1142 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1143 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 1) 1144 %inc = add nsw i32 %i.0, 1 1145 br label %for.cond, !llvm.loop !28 1146} 1147 1148; Function Attrs: alwaysinline convergent norecurse nounwind 1149define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1150; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 1151; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1152; AMDGPU-NEXT: entry: 1153; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1154; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1155; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1156; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1157; AMDGPU-NEXT: ret void 1158; 1159; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 1160; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1161; NVPTX-NEXT: entry: 1162; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1163; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1164; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1165; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1166; NVPTX-NEXT: ret void 1167; 1168; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1169; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1170; AMDGPU-DISABLED-NEXT: entry: 1171; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1172; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1173; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1174; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1175; AMDGPU-DISABLED-NEXT: ret void 1176; 1177; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 1178; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1179; NVPTX-DISABLED-NEXT: entry: 1180; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1181; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1182; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1183; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1184; NVPTX-DISABLED-NEXT: ret void 1185; 1186entry: 1187 %0 = load i32, i32* %x, align 4, !tbaa !18 1188 %inc = add nsw i32 %0, 1 1189 store i32 %inc, i32* %x, align 4, !tbaa !18 1190 call void @unknown() #11 1191 ret void 1192} 1193 1194; Function Attrs: convergent norecurse nounwind 1195define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { 1196; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1197; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1198; AMDGPU-NEXT: entry: 1199; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1200; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1201; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1202; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1203; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1204; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1205; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1206; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1207; AMDGPU-NEXT: ret void 1208; 1209; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1210; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1211; NVPTX-NEXT: entry: 1212; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1213; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1214; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1215; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1216; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1217; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1218; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1219; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1220; NVPTX-NEXT: ret void 1221; 1222; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1223; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1224; AMDGPU-DISABLED-NEXT: entry: 1225; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1226; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1227; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1228; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1229; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1230; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1231; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1232; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1233; AMDGPU-DISABLED-NEXT: ret void 1234; 1235; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper 1236; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1237; NVPTX-DISABLED-NEXT: entry: 1238; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1239; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1240; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1241; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1242; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1243; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1244; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1245; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1246; NVPTX-DISABLED-NEXT: ret void 1247; 1248entry: 1249 %.addr1 = alloca i32, align 4 1250 %.zero.addr = alloca i32, align 4 1251 %global_args = alloca i8**, align 8 1252 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1253 store i32 0, i32* %.zero.addr, align 4 1254 call void @__kmpc_get_shared_variables(i8*** %global_args) 1255 %2 = load i8**, i8*** %global_args, align 8 1256 %3 = bitcast i8** %2 to i32** 1257 %4 = load i32*, i32** %3, align 8, !tbaa !26 1258 call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1259 ret void 1260} 1261 1262; Function Attrs: alwaysinline convergent norecurse nounwind 1263define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50() #0 { 1264; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1265; AMDGPU-SAME: () #[[ATTR0]] { 1266; AMDGPU-NEXT: entry: 1267; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1268; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1269; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1270; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1271; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1272; AMDGPU: common.ret: 1273; AMDGPU-NEXT: ret void 1274; AMDGPU: user_code.entry: 1275; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1276; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1277; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1278; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1279; AMDGPU-NEXT: br label [[COMMON_RET]] 1280; 1281; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1282; NVPTX-SAME: () #[[ATTR0]] { 1283; NVPTX-NEXT: entry: 1284; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1285; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1286; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) 1287; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1288; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1289; NVPTX: common.ret: 1290; NVPTX-NEXT: ret void 1291; NVPTX: user_code.entry: 1292; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1293; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1294; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1295; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) 1296; NVPTX-NEXT: br label [[COMMON_RET]] 1297; 1298; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1299; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1300; AMDGPU-DISABLED-NEXT: entry: 1301; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1302; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1303; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1304; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1305; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1306; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1307; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1308; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1309; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1310; AMDGPU-DISABLED: is_worker_check: 1311; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1312; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1313; AMDGPU-DISABLED: worker_state_machine.begin: 1314; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1315; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1316; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1317; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1318; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1319; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1320; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1321; AMDGPU-DISABLED: worker_state_machine.finished: 1322; AMDGPU-DISABLED-NEXT: ret void 1323; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1324; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1325; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 1326; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1327; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1328; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 1329; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1330; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1331; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1332; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1333; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1334; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1335; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1336; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1337; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1338; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1339; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1340; AMDGPU-DISABLED: thread.user_code.check: 1341; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1342; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1343; AMDGPU-DISABLED: common.ret: 1344; AMDGPU-DISABLED-NEXT: ret void 1345; AMDGPU-DISABLED: user_code.entry: 1346; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1347; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1348; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1349; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1350; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1351; 1352; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 1353; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1354; NVPTX-DISABLED-NEXT: entry: 1355; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1356; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1357; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1358; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1359; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1360; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1361; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1362; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1363; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1364; NVPTX-DISABLED: is_worker_check: 1365; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1366; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1367; NVPTX-DISABLED: worker_state_machine.begin: 1368; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1369; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1370; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1371; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1372; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1373; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1374; NVPTX-DISABLED: worker_state_machine.finished: 1375; NVPTX-DISABLED-NEXT: ret void 1376; NVPTX-DISABLED: worker_state_machine.is_active.check: 1377; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1378; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 1379; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__7_wrapper.ID to void (i16, i32)*) 1380; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1381; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 1382; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]]) 1383; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1384; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1385; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1386; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1387; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1388; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1389; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1390; NVPTX-DISABLED: worker_state_machine.done.barrier: 1391; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1392; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1393; NVPTX-DISABLED: thread.user_code.check: 1394; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1395; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1396; NVPTX-DISABLED: common.ret: 1397; NVPTX-DISABLED-NEXT: ret void 1398; NVPTX-DISABLED: user_code.entry: 1399; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1400; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] 1401; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1402; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1403; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1404; 1405entry: 1406 %.zero.addr = alloca i32, align 4 1407 %.threadid_temp. = alloca i32, align 4 1408 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1409 %exec_user_code = icmp eq i32 %0, -1 1410 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1411 1412common.ret: ; preds = %entry, %user_code.entry 1413 ret void 1414 1415user_code.entry: ; preds = %entry 1416 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1417 store i32 0, i32* %.zero.addr, align 4 1418 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1419 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6 1420 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1421 br label %common.ret 1422} 1423 1424; Function Attrs: alwaysinline convergent norecurse nounwind 1425define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1426; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 1427; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1428; AMDGPU-NEXT: entry: 1429; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1430; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32* 1431; AMDGPU-NEXT: br label [[REGION_CHECK_TID:%.*]] 1432; AMDGPU: region.check.tid: 1433; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1434; AMDGPU-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1435; AMDGPU-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1436; AMDGPU: region.guarded: 1437; AMDGPU-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1438; AMDGPU-NEXT: br label [[REGION_GUARDED_END:%.*]] 1439; AMDGPU: region.guarded.end: 1440; AMDGPU-NEXT: br label [[REGION_BARRIER]] 1441; AMDGPU: region.barrier: 1442; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1443; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] 1444; AMDGPU: region.exit: 1445; AMDGPU-NEXT: br label [[FOR_COND:%.*]] 1446; AMDGPU: for.cond: 1447; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1448; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1449; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1450; AMDGPU: for.cond.cleanup: 1451; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 1452; AMDGPU-NEXT: ret void 1453; AMDGPU: for.body: 1454; AMDGPU-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1455; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1456; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1457; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1458; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1) 1459; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1460; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1461; 1462; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 1463; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1464; NVPTX-NEXT: entry: 1465; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1466; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32* 1467; NVPTX-NEXT: br label [[REGION_CHECK_TID:%.*]] 1468; NVPTX: region.check.tid: 1469; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1470; NVPTX-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 1471; NVPTX-NEXT: br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] 1472; NVPTX: region.guarded: 1473; NVPTX-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1474; NVPTX-NEXT: br label [[REGION_GUARDED_END:%.*]] 1475; NVPTX: region.guarded.end: 1476; NVPTX-NEXT: br label [[REGION_BARRIER]] 1477; NVPTX: region.barrier: 1478; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) 1479; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] 1480; NVPTX: region.exit: 1481; NVPTX-NEXT: br label [[FOR_COND:%.*]] 1482; NVPTX: for.cond: 1483; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[REGION_EXIT]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1484; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1485; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1486; NVPTX: for.cond.cleanup: 1487; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 1488; NVPTX-NEXT: ret void 1489; NVPTX: for.body: 1490; NVPTX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1491; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] 1492; NVPTX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1493; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1494; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1) 1495; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1496; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1497; 1498; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1499; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1500; AMDGPU-DISABLED-NEXT: entry: 1501; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1502; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32* 1503; AMDGPU-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1504; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1505; AMDGPU-DISABLED: for.cond: 1506; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1507; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1508; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1509; AMDGPU-DISABLED: for.cond.cleanup: 1510; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1511; AMDGPU-DISABLED-NEXT: ret void 1512; AMDGPU-DISABLED: for.body: 1513; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1514; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1515; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1516; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1517; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) 1518; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1519; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1520; 1521; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 1522; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1523; NVPTX-DISABLED-NEXT: entry: 1524; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 1525; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32* 1526; NVPTX-DISABLED-NEXT: store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]] 1527; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] 1528; NVPTX-DISABLED: for.cond: 1529; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 1530; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 1531; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] 1532; NVPTX-DISABLED: for.cond.cleanup: 1533; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 1534; NVPTX-DISABLED-NEXT: ret void 1535; NVPTX-DISABLED: for.body: 1536; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1537; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] 1538; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] 1539; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1540; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) 1541; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 1542; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] 1543; 1544entry: 1545 %captured_vars_addrs = alloca [1 x i8*], align 8 1546 %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) 1547 %x_on_stack = bitcast i8* %x to i32* 1548 store i32 42, i32* %x_on_stack, align 4, !tbaa !18 1549 br label %for.cond 1550 1551for.cond: ; preds = %for.body, %entry 1552 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 1553 %cmp = icmp slt i32 %i.0, 100 1554 br i1 %cmp, label %for.body, label %for.cond.cleanup 1555 1556for.cond.cleanup: ; preds = %for.cond 1557 call void @spmd_amenable() #10 1558 call void @__kmpc_free_shared(i8* %x, i64 4) 1559 ret void 1560 1561for.body: ; preds = %for.cond 1562 %0 = getelementptr inbounds [1 x i8*], [1 x i8*]* %captured_vars_addrs, i64 0, i64 0 1563 store i8* %x, i8** %0, align 8, !tbaa !26 1564 %1 = load i32, i32* %.global_tid., align 4, !tbaa !18 1565 %2 = bitcast [1 x i8*]* %captured_vars_addrs to i8** 1566 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 1) 1567 %inc = add nsw i32 %i.0, 1 1568 br label %for.cond, !llvm.loop !29 1569} 1570 1571; Function Attrs: alwaysinline convergent norecurse nounwind 1572define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { 1573; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 1574; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1575; AMDGPU-NEXT: entry: 1576; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1577; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1578; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1579; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1580; AMDGPU-NEXT: ret void 1581; 1582; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 1583; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1584; NVPTX-NEXT: entry: 1585; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1586; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1587; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1588; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1589; NVPTX-NEXT: ret void 1590; 1591; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1592; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1593; AMDGPU-DISABLED-NEXT: entry: 1594; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1595; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1596; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1597; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1598; AMDGPU-DISABLED-NEXT: ret void 1599; 1600; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 1601; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { 1602; NVPTX-DISABLED-NEXT: entry: 1603; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] 1604; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 1605; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] 1606; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1607; NVPTX-DISABLED-NEXT: ret void 1608; 1609entry: 1610 %0 = load i32, i32* %x, align 4, !tbaa !18 1611 %inc = add nsw i32 %0, 1 1612 store i32 %inc, i32* %x, align 4, !tbaa !18 1613 call void @unknown() #11 1614 ret void 1615} 1616 1617; Function Attrs: convergent norecurse nounwind 1618define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { 1619; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1620; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1621; AMDGPU-NEXT: entry: 1622; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1623; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1624; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1625; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1626; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1627; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1628; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1629; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1630; AMDGPU-NEXT: ret void 1631; 1632; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1633; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1634; NVPTX-NEXT: entry: 1635; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1636; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1637; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1638; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1639; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1640; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1641; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1642; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1643; NVPTX-NEXT: ret void 1644; 1645; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1646; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1647; AMDGPU-DISABLED-NEXT: entry: 1648; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1649; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1650; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1651; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1652; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1653; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1654; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1655; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1656; AMDGPU-DISABLED-NEXT: ret void 1657; 1658; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper 1659; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 1660; NVPTX-DISABLED-NEXT: entry: 1661; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1662; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1663; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1664; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1665; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1666; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** 1667; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] 1668; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] 1669; NVPTX-DISABLED-NEXT: ret void 1670; 1671entry: 1672 %.addr1 = alloca i32, align 4 1673 %.zero.addr = alloca i32, align 4 1674 %global_args = alloca i8**, align 8 1675 store i32 %1, i32* %.addr1, align 4, !tbaa !18 1676 store i32 0, i32* %.zero.addr, align 4 1677 call void @__kmpc_get_shared_variables(i8*** %global_args) 1678 %2 = load i8**, i8*** %global_args, align 8 1679 %3 = bitcast i8** %2 to i32** 1680 %4 = load i32*, i32** %3, align 8, !tbaa !26 1681 call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr, i32* %4) #6 1682 ret void 1683} 1684 1685; Function Attrs: alwaysinline convergent norecurse nounwind 1686define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 { 1687; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1688; AMDGPU-SAME: () #[[ATTR0]] { 1689; AMDGPU-NEXT: entry: 1690; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1691; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1692; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1693; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1694; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1695; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1696; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1697; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1698; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1699; AMDGPU: is_worker_check: 1700; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1701; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1702; AMDGPU: worker_state_machine.begin: 1703; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1704; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1705; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1706; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1707; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1708; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1709; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1710; AMDGPU: worker_state_machine.finished: 1711; AMDGPU-NEXT: ret void 1712; AMDGPU: worker_state_machine.is_active.check: 1713; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1714; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1715; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1716; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1717; AMDGPU: worker_state_machine.parallel_region.end: 1718; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1719; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1720; AMDGPU: worker_state_machine.done.barrier: 1721; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1722; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1723; AMDGPU: thread.user_code.check: 1724; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1725; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1726; AMDGPU: common.ret: 1727; AMDGPU-NEXT: ret void 1728; AMDGPU: user_code.entry: 1729; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1730; AMDGPU-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1731; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1732; AMDGPU-NEXT: br label [[COMMON_RET]] 1733; 1734; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1735; NVPTX-SAME: () #[[ATTR0]] { 1736; NVPTX-NEXT: entry: 1737; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1738; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1739; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1740; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1741; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1742; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1743; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1744; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1745; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1746; NVPTX: is_worker_check: 1747; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1748; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1749; NVPTX: worker_state_machine.begin: 1750; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1751; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1752; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1753; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1754; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1755; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1756; NVPTX: worker_state_machine.finished: 1757; NVPTX-NEXT: ret void 1758; NVPTX: worker_state_machine.is_active.check: 1759; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1760; NVPTX: worker_state_machine.parallel_region.fallback.execute: 1761; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1762; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1763; NVPTX: worker_state_machine.parallel_region.end: 1764; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 1765; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1766; NVPTX: worker_state_machine.done.barrier: 1767; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1768; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1769; NVPTX: thread.user_code.check: 1770; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1771; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1772; NVPTX: common.ret: 1773; NVPTX-NEXT: ret void 1774; NVPTX: user_code.entry: 1775; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1776; NVPTX-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1777; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1778; NVPTX-NEXT: br label [[COMMON_RET]] 1779; 1780; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1781; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 1782; AMDGPU-DISABLED-NEXT: entry: 1783; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1784; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1785; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1786; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1787; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1788; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1789; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1790; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1791; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1792; AMDGPU-DISABLED: is_worker_check: 1793; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1794; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1795; AMDGPU-DISABLED: worker_state_machine.begin: 1796; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1797; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1798; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1799; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1800; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1801; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1802; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1803; AMDGPU-DISABLED: worker_state_machine.finished: 1804; AMDGPU-DISABLED-NEXT: ret void 1805; AMDGPU-DISABLED: worker_state_machine.is_active.check: 1806; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1807; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1808; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1809; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1810; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 1811; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1812; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1813; AMDGPU-DISABLED: worker_state_machine.done.barrier: 1814; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1815; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1816; AMDGPU-DISABLED: thread.user_code.check: 1817; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1818; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1819; AMDGPU-DISABLED: common.ret: 1820; AMDGPU-DISABLED-NEXT: ret void 1821; AMDGPU-DISABLED: user_code.entry: 1822; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1823; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1824; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1825; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 1826; 1827; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 1828; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 1829; NVPTX-DISABLED-NEXT: entry: 1830; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1831; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1832; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1833; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1834; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1835; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1836; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1837; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1838; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1839; NVPTX-DISABLED: is_worker_check: 1840; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1841; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1842; NVPTX-DISABLED: worker_state_machine.begin: 1843; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1844; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1845; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1846; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1847; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1848; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1849; NVPTX-DISABLED: worker_state_machine.finished: 1850; NVPTX-DISABLED-NEXT: ret void 1851; NVPTX-DISABLED: worker_state_machine.is_active.check: 1852; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1853; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 1854; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1855; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1856; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 1857; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 1858; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1859; NVPTX-DISABLED: worker_state_machine.done.barrier: 1860; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1861; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1862; NVPTX-DISABLED: thread.user_code.check: 1863; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1864; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1865; NVPTX-DISABLED: common.ret: 1866; NVPTX-DISABLED-NEXT: ret void 1867; NVPTX-DISABLED: user_code.entry: 1868; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1869; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] 1870; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1871; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 1872; 1873entry: 1874 %.zero.addr = alloca i32, align 4 1875 %.threadid_temp. = alloca i32, align 4 1876 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 1877 %exec_user_code = icmp eq i32 %0, -1 1878 br i1 %exec_user_code, label %user_code.entry, label %common.ret 1879 1880common.ret: ; preds = %entry, %user_code.entry 1881 ret void 1882 1883user_code.entry: ; preds = %entry 1884 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 1885 store i32 0, i32* %.zero.addr, align 4 1886 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 1887 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6 1888 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 1889 br label %common.ret 1890} 1891 1892; Function Attrs: alwaysinline convergent norecurse nounwind 1893define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 1894; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 1895; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1896; AMDGPU-NEXT: entry: 1897; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 1898; AMDGPU-NEXT: ret void 1899; 1900; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 1901; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1902; NVPTX-NEXT: entry: 1903; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 1904; NVPTX-NEXT: ret void 1905; 1906; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1907; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1908; AMDGPU-DISABLED-NEXT: entry: 1909; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1910; AMDGPU-DISABLED-NEXT: ret void 1911; 1912; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 1913; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1914; NVPTX-DISABLED-NEXT: entry: 1915; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 1916; NVPTX-DISABLED-NEXT: ret void 1917; 1918entry: 1919 call void @unknown() #11 1920 ret void 1921} 1922 1923; Function Attrs: alwaysinline convergent norecurse nounwind 1924define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { 1925; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 1926; AMDGPU-SAME: () #[[ATTR0]] { 1927; AMDGPU-NEXT: entry: 1928; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 1929; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 1930; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1931; AMDGPU-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1932; AMDGPU-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1933; AMDGPU-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1934; AMDGPU-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1935; AMDGPU-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1936; AMDGPU: is_worker_check: 1937; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1938; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1939; AMDGPU: worker_state_machine.begin: 1940; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1941; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 1942; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 1943; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 1944; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1945; AMDGPU-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 1946; AMDGPU-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 1947; AMDGPU: worker_state_machine.finished: 1948; AMDGPU-NEXT: ret void 1949; AMDGPU: worker_state_machine.is_active.check: 1950; AMDGPU-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 1951; AMDGPU: worker_state_machine.parallel_region.check: 1952; AMDGPU-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 1953; AMDGPU-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 1954; AMDGPU: worker_state_machine.parallel_region.execute: 1955; AMDGPU-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 1956; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 1957; AMDGPU: worker_state_machine.parallel_region.fallback.execute: 1958; AMDGPU-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 1959; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 1960; AMDGPU: worker_state_machine.parallel_region.end: 1961; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() 1962; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 1963; AMDGPU: worker_state_machine.done.barrier: 1964; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1965; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 1966; AMDGPU: thread.user_code.check: 1967; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1968; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 1969; AMDGPU: common.ret: 1970; AMDGPU-NEXT: ret void 1971; AMDGPU: user_code.entry: 1972; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 1973; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 1974; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 1975; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1976; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP4]], i64 0) 1977; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1978; AMDGPU-NEXT: br label [[COMMON_RET]] 1979; 1980; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 1981; NVPTX-SAME: () #[[ATTR0]] { 1982; NVPTX-NEXT: entry: 1983; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 1984; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 1985; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 1986; NVPTX-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 1987; NVPTX-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 1988; NVPTX-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 1989; NVPTX-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 1990; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 1991; NVPTX: is_worker_check: 1992; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 1993; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 1994; NVPTX: worker_state_machine.begin: 1995; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 1996; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 1997; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 1998; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 1999; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2000; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2001; NVPTX: worker_state_machine.finished: 2002; NVPTX-NEXT: ret void 2003; NVPTX: worker_state_machine.is_active.check: 2004; NVPTX-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2005; NVPTX: worker_state_machine.parallel_region.check: 2006; NVPTX-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2007; NVPTX-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2008; NVPTX: worker_state_machine.parallel_region.execute: 2009; NVPTX-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2010; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2011; NVPTX: worker_state_machine.parallel_region.fallback.execute: 2012; NVPTX-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2013; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2014; NVPTX: worker_state_machine.parallel_region.end: 2015; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() 2016; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2017; NVPTX: worker_state_machine.done.barrier: 2018; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2019; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2020; NVPTX: thread.user_code.check: 2021; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2022; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2023; NVPTX: common.ret: 2024; NVPTX-NEXT: ret void 2025; NVPTX: user_code.entry: 2026; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2027; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2028; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2029; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2030; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP4]], i64 0) 2031; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2032; NVPTX-NEXT: br label [[COMMON_RET]] 2033; 2034; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2035; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { 2036; AMDGPU-DISABLED-NEXT: entry: 2037; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) 2038; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2039; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2040; AMDGPU-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2041; AMDGPU-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2042; AMDGPU-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2043; AMDGPU-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2044; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2045; AMDGPU-DISABLED: is_worker_check: 2046; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2047; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2048; AMDGPU-DISABLED: worker_state_machine.begin: 2049; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2050; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** 2051; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) 2052; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 2053; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2054; AMDGPU-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2055; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2056; AMDGPU-DISABLED: worker_state_machine.finished: 2057; AMDGPU-DISABLED-NEXT: ret void 2058; AMDGPU-DISABLED: worker_state_machine.is_active.check: 2059; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2060; AMDGPU-DISABLED: worker_state_machine.parallel_region.check: 2061; AMDGPU-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2062; AMDGPU-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2063; AMDGPU-DISABLED: worker_state_machine.parallel_region.execute: 2064; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2065; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2066; AMDGPU-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2067; AMDGPU-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2068; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2069; AMDGPU-DISABLED: worker_state_machine.parallel_region.end: 2070; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2071; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2072; AMDGPU-DISABLED: worker_state_machine.done.barrier: 2073; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2074; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2075; AMDGPU-DISABLED: thread.user_code.check: 2076; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2077; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2078; AMDGPU-DISABLED: common.ret: 2079; AMDGPU-DISABLED-NEXT: ret void 2080; AMDGPU-DISABLED: user_code.entry: 2081; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2082; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2083; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2084; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2085; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP4]], i64 0) 2086; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2087; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] 2088; 2089; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 2090; NVPTX-DISABLED-SAME: () #[[ATTR0]] { 2091; NVPTX-DISABLED-NEXT: entry: 2092; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 2093; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 2094; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) 2095; NVPTX-DISABLED-NEXT: [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() 2096; NVPTX-DISABLED-NEXT: [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size() 2097; NVPTX-DISABLED-NEXT: [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]] 2098; NVPTX-DISABLED-NEXT: [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]] 2099; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]] 2100; NVPTX-DISABLED: is_worker_check: 2101; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 2102; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] 2103; NVPTX-DISABLED: worker_state_machine.begin: 2104; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2105; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) 2106; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 2107; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* 2108; NVPTX-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null 2109; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]] 2110; NVPTX-DISABLED: worker_state_machine.finished: 2111; NVPTX-DISABLED-NEXT: ret void 2112; NVPTX-DISABLED: worker_state_machine.is_active.check: 2113; NVPTX-DISABLED-NEXT: br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]] 2114; NVPTX-DISABLED: worker_state_machine.parallel_region.check: 2115; NVPTX-DISABLED-NEXT: [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], bitcast (i8* @__omp_outlined__9_wrapper.ID to void (i16, i32)*) 2116; NVPTX-DISABLED-NEXT: br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]] 2117; NVPTX-DISABLED: worker_state_machine.parallel_region.execute: 2118; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9_wrapper(i16 0, i32 [[TMP0]]) 2119; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]] 2120; NVPTX-DISABLED: worker_state_machine.parallel_region.fallback.execute: 2121; NVPTX-DISABLED-NEXT: call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]]) 2122; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]] 2123; NVPTX-DISABLED: worker_state_machine.parallel_region.end: 2124; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() 2125; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] 2126; NVPTX-DISABLED: worker_state_machine.done.barrier: 2127; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) 2128; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] 2129; NVPTX-DISABLED: thread.user_code.check: 2130; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2131; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] 2132; NVPTX-DISABLED: common.ret: 2133; NVPTX-DISABLED-NEXT: ret void 2134; NVPTX-DISABLED: user_code.entry: 2135; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] 2136; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] 2137; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] 2138; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2139; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP4]], i64 0) 2140; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2141; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] 2142; 2143entry: 2144 %captured_vars_addrs = alloca [0 x i8*], align 8 2145 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 2146 %exec_user_code = icmp eq i32 %0, -1 2147 br i1 %exec_user_code, label %user_code.entry, label %common.ret 2148 2149common.ret: ; preds = %entry, %user_code.entry 2150 ret void 2151 2152user_code.entry: ; preds = %entry 2153 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 2154 %2 = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @1, i32 %1, i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) 2155 %3 = bitcast i8* %2 to %struct.kmp_task_t_with_privates* 2156 %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2) 2157 %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 2158 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0) 2159 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 2160 br label %common.ret 2161} 2162 2163; Function Attrs: alwaysinline convergent nounwind 2164define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { 2165; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. 2166; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2167; AMDGPU-NEXT: entry: 2168; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] 2169; AMDGPU-NEXT: ret void 2170; 2171; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. 2172; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2173; NVPTX-NEXT: entry: 2174; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] 2175; NVPTX-NEXT: ret void 2176; 2177; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2178; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2179; AMDGPU-DISABLED-NEXT: entry: 2180; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 2181; AMDGPU-DISABLED-NEXT: ret void 2182; 2183; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. 2184; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { 2185; NVPTX-DISABLED-NEXT: entry: 2186; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] 2187; NVPTX-DISABLED-NEXT: ret void 2188; 2189entry: 2190 call void @spmd_amenable() #10 2191 ret void 2192} 2193 2194; Function Attrs: convergent norecurse nounwind 2195define internal i32 @"_omp_task_entry$"(i32 %0, %struct.kmp_task_t_with_privates* noalias %1) #3 { 2196entry: 2197 %2 = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* %1, i32 0, i32 0 2198 %3 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 2 2199 %4 = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* %2, i32 0, i32 0 2200 %5 = load i8*, i8** %4, align 8, !tbaa !30 2201 %6 = bitcast i8* %5 to %struct.anon* 2202 %7 = bitcast %struct.kmp_task_t_with_privates* %1 to i8* 2203 call void @.omp_outlined.(i32 %0, i32* %3, i8* null, void (i8*, ...)* null, i8* %7, %struct.anon* %6) #6 2204 ret i32 0 2205} 2206 2207; Function Attrs: nounwind 2208declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #6 2209 2210; Function Attrs: nounwind 2211declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #6 2212 2213; Function Attrs: nosync nounwind 2214declare void @__kmpc_free_shared(i8* nocapture, i64) #8 2215 2216; Function Attrs: nofree nosync nounwind 2217declare i8* @__kmpc_alloc_shared(i64) #7 2218 2219; Function Attrs: convergent 2220declare void @use(i32* nocapture) #5 2221 2222; Function Attrs: convergent 2223declare void @unknown() #2 2224 2225; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2226declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 2227 2228declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) 2229 2230declare void @__kmpc_get_shared_variables(i8***) 2231 2232; Function Attrs: alwaysinline 2233declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) #4 2234 2235; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn 2236declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 2237 2238; Function Attrs: convergent 2239declare void @spmd_amenable() #5 2240 2241; Function Attrs: nounwind 2242declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6 2243 2244declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) 2245 2246 2247; Function Attrs: alwaysinline convergent norecurse nounwind 2248define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { 2249; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 2250; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2251; AMDGPU-NEXT: entry: 2252; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] 2253; AMDGPU-NEXT: ret void 2254; 2255; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 2256; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2257; NVPTX-NEXT: entry: 2258; NVPTX-NEXT: call void @unknown() #[[ATTR7]] 2259; NVPTX-NEXT: ret void 2260; 2261; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2262; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2263; AMDGPU-DISABLED-NEXT: entry: 2264; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 2265; AMDGPU-DISABLED-NEXT: ret void 2266; 2267; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 2268; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2269; NVPTX-DISABLED-NEXT: entry: 2270; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] 2271; NVPTX-DISABLED-NEXT: ret void 2272; 2273entry: 2274 call void @unknown() #11 2275 ret void 2276} 2277 2278; Function Attrs: convergent norecurse nounwind 2279define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { 2280; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2281; AMDGPU-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2282; AMDGPU-NEXT: entry: 2283; AMDGPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2284; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2285; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2286; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2287; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2288; AMDGPU-NEXT: ret void 2289; 2290; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2291; NVPTX-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2292; NVPTX-NEXT: entry: 2293; NVPTX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2294; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2295; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2296; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2297; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2298; NVPTX-NEXT: ret void 2299; 2300; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2301; AMDGPU-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2302; AMDGPU-DISABLED-NEXT: entry: 2303; AMDGPU-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2304; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2305; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2306; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2307; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2308; AMDGPU-DISABLED-NEXT: ret void 2309; 2310; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper 2311; NVPTX-DISABLED-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { 2312; NVPTX-DISABLED-NEXT: entry: 2313; NVPTX-DISABLED-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2314; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2315; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2316; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2317; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] 2318; NVPTX-DISABLED-NEXT: ret void 2319; 2320entry: 2321 %.addr1 = alloca i32, align 4 2322 %.zero.addr = alloca i32, align 4 2323 %global_args = alloca i8**, align 8 2324 store i32 %1, i32* %.addr1, align 4, !tbaa !18 2325 store i32 0, i32* %.zero.addr, align 4 2326 call void @__kmpc_get_shared_variables(i8*** %global_args) 2327 call void @__omp_outlined__9(i32* %.addr1, i32* %.zero.addr) #6 2328 ret void 2329} 2330 2331attributes #0 = { alwaysinline convergent norecurse nounwind } 2332attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } 2333attributes #2 = { convergent } 2334attributes #3 = { convergent norecurse nounwind } 2335attributes #4 = { alwaysinline } 2336attributes #5 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2337attributes #6 = { nounwind } 2338attributes #7 = { nofree nosync nounwind } 2339attributes #8 = { nosync nounwind } 2340attributes #9 = { alwaysinline convergent nounwind } 2341attributes #10 = { convergent "llvm.assume"="ompx_spmd_amenable" } 2342attributes #11 = { convergent } 2343 2344!omp_offload.info = !{!0, !1, !2, !3, !4, !5} 2345!nvvm.annotations = !{!6, !7, !8, !9, !10, !11} 2346!llvm.module.flags = !{!12, !13, !14, !15, !16} 2347!llvm.ident = !{!17} 2348 2349!0 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2350!1 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2351!2 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2352!3 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2353!4 = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2354!5 = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2355!6 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2356!7 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2357!8 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2358!9 = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2359!10 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2360!11 = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2361!12 = !{i32 1, !"wchar_size", i32 4} 2362!13 = !{i32 7, !"openmp", i32 50} 2363!14 = !{i32 7, !"openmp-device", i32 50} 2364!15 = !{i32 7, !"PIC Level", i32 2} 2365!16 = !{i32 7, !"frame-pointer", i32 2} 2366!17 = !{!"clang version 14.0.0"} 2367!18 = !{!19, !19, i64 0} 2368!19 = !{!"int", !20, i64 0} 2369!20 = !{!"omnipotent char", !21, i64 0} 2370!21 = !{!"Simple C/C++ TBAA"} 2371!22 = distinct !{!22, !23, !24} 2372!23 = !{!"llvm.loop.mustprogress"} 2373!24 = !{!"llvm.loop.unroll.disable"} 2374!25 = distinct !{!25, !23, !24} 2375!26 = !{!27, !27, i64 0} 2376!27 = !{!"any pointer", !20, i64 0} 2377!28 = distinct !{!28, !23, !24} 2378!29 = distinct !{!29, !23, !24} 2379!30 = !{!31, !27, i64 0} 2380!31 = !{!"kmp_task_t_with_privates", !32, i64 0} 2381!32 = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2382;. 2383; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2384; AMDGPU: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2385; AMDGPU: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2386; AMDGPU: attributes #[[ATTR3]] = { nounwind } 2387; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2388; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind } 2389; AMDGPU: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2390; AMDGPU: attributes #[[ATTR7]] = { convergent } 2391; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2392; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2393; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2394;. 2395; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2396; NVPTX: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2397; NVPTX: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2398; NVPTX: attributes #[[ATTR3]] = { nounwind } 2399; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2400; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind } 2401; NVPTX: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2402; NVPTX: attributes #[[ATTR7]] = { convergent } 2403; NVPTX: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2404; NVPTX: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2405; NVPTX: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2406;. 2407; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2408; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2409; AMDGPU-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2410; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } 2411; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2412; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind } 2413; AMDGPU-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2414; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent } 2415; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2416; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2417; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2418;. 2419; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } 2420; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } 2421; NVPTX-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } 2422; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } 2423; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } 2424; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind } 2425; NVPTX-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } 2426; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent } 2427; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } 2428; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } 2429; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } 2430;. 2431; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2432; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2433; AMDGPU: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2434; AMDGPU: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2435; AMDGPU: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2436; AMDGPU: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2437; AMDGPU: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2438; AMDGPU: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2439; AMDGPU: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2440; AMDGPU: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2441; AMDGPU: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2442; AMDGPU: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2443; AMDGPU: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2444; AMDGPU: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2445; AMDGPU: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2446; AMDGPU: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2447; AMDGPU: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2448; AMDGPU: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2449; AMDGPU: [[TBAA18]] = !{!19, !19, i64 0} 2450; AMDGPU: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2451; AMDGPU: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2452; AMDGPU: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2453; AMDGPU: [[LOOP22]] = distinct !{!22, !23, !24} 2454; AMDGPU: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2455; AMDGPU: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2456; AMDGPU: [[LOOP25]] = distinct !{!25, !23, !24} 2457; AMDGPU: [[TBAA26]] = !{!27, !27, i64 0} 2458; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2459; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24} 2460; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24} 2461; AMDGPU: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2462; AMDGPU: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2463; AMDGPU: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2464;. 2465; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2466; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2467; NVPTX: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2468; NVPTX: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2469; NVPTX: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2470; NVPTX: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2471; NVPTX: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2472; NVPTX: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2473; NVPTX: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2474; NVPTX: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2475; NVPTX: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2476; NVPTX: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2477; NVPTX: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2478; NVPTX: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2479; NVPTX: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2480; NVPTX: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2481; NVPTX: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2482; NVPTX: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2483; NVPTX: [[TBAA18]] = !{!19, !19, i64 0} 2484; NVPTX: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2485; NVPTX: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2486; NVPTX: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2487; NVPTX: [[LOOP22]] = distinct !{!22, !23, !24} 2488; NVPTX: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2489; NVPTX: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2490; NVPTX: [[LOOP25]] = distinct !{!25, !23, !24} 2491; NVPTX: [[TBAA26]] = !{!27, !27, i64 0} 2492; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2493; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24} 2494; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24} 2495; NVPTX: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2496; NVPTX: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2497; NVPTX: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2498;. 2499; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2500; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2501; AMDGPU-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2502; AMDGPU-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2503; AMDGPU-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2504; AMDGPU-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2505; AMDGPU-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2506; AMDGPU-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2507; AMDGPU-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2508; AMDGPU-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2509; AMDGPU-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2510; AMDGPU-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2511; AMDGPU-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2512; AMDGPU-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2513; AMDGPU-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2514; AMDGPU-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2515; AMDGPU-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2516; AMDGPU-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2517; AMDGPU-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2518; AMDGPU-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2519; AMDGPU-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2520; AMDGPU-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2521; AMDGPU-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2522; AMDGPU-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2523; AMDGPU-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2524; AMDGPU-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2525; AMDGPU-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2526; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2527; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2528; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2529; AMDGPU-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2530; AMDGPU-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2531; AMDGPU-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2532;. 2533; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} 2534; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} 2535; NVPTX-DISABLED: [[META2:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop", i32 5, i32 0} 2536; NVPTX-DISABLED: [[META3:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var", i32 35, i32 2} 2537; NVPTX-DISABLED: [[META4:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_target", i32 65, i32 4} 2538; NVPTX-DISABLED: [[META5:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_shared_var_guarded", i32 50, i32 3} 2539; NVPTX-DISABLED: [[META6:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_l5, !"kernel", i32 1} 2540; NVPTX-DISABLED: [[META7:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20, !"kernel", i32 1} 2541; NVPTX-DISABLED: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35, !"kernel", i32 1} 2542; NVPTX-DISABLED: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50, !"kernel", i32 1} 2543; NVPTX-DISABLED: [[META10:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65, !"kernel", i32 1} 2544; NVPTX-DISABLED: [[META11:![0-9]+]] = !{void ()* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74, !"kernel", i32 1} 2545; NVPTX-DISABLED: [[META12:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} 2546; NVPTX-DISABLED: [[META13:![0-9]+]] = !{i32 7, !"openmp", i32 50} 2547; NVPTX-DISABLED: [[META14:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} 2548; NVPTX-DISABLED: [[META15:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} 2549; NVPTX-DISABLED: [[META16:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} 2550; NVPTX-DISABLED: [[META17:![0-9]+]] = !{!"clang version 14.0.0"} 2551; NVPTX-DISABLED: [[TBAA18]] = !{!19, !19, i64 0} 2552; NVPTX-DISABLED: [[META19:![0-9]+]] = !{!"int", !20, i64 0} 2553; NVPTX-DISABLED: [[META20:![0-9]+]] = !{!"omnipotent char", !21, i64 0} 2554; NVPTX-DISABLED: [[META21:![0-9]+]] = !{!"Simple C/C++ TBAA"} 2555; NVPTX-DISABLED: [[LOOP22]] = distinct !{!22, !23, !24} 2556; NVPTX-DISABLED: [[META23:![0-9]+]] = !{!"llvm.loop.mustprogress"} 2557; NVPTX-DISABLED: [[META24:![0-9]+]] = !{!"llvm.loop.unroll.disable"} 2558; NVPTX-DISABLED: [[LOOP25]] = distinct !{!25, !23, !24} 2559; NVPTX-DISABLED: [[TBAA26]] = !{!27, !27, i64 0} 2560; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0} 2561; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24} 2562; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24} 2563; NVPTX-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0} 2564; NVPTX-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0} 2565; NVPTX-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32} 2566;. 2567