1; RUN: opt -S -passes=openmp-opt -openmp-ir-builder-optimistic-attributes -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s 2; RUN: opt -S -passes=openmp-opt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s 3 4; C input used for this test: 5 6; void bar(void) { 7; #pragma omp parallel 8; { } 9; } 10; void foo(void) { 11; #pragma omp target teams 12; { 13; #pragma omp parallel 14; {} 15; bar(); 16; unknown(); 17; #pragma omp parallel 18; {} 19; } 20; } 21 22; Verify we replace the function pointer uses for the first and last outlined 23; region (1 and 3) but not for the middle one (2) because it could be called from 24; another kernel. 25 26; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef 27; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef 28 29; CHECK-DAG: icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__1_wrapper.ID to void (i16, i32)*) 30; CHECK-DAG: icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) 31 32 33; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** %{{.*}}, i64 0) 34; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** %{{.*}}, i64 0) 35; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %{{.*}}, i64 0) 36 37 38%struct.ident_t = type { i32, i32, i32, i32, i8* } 39 40@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 41@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 42@__omp_offloading_10301_87b2c_foo_l7_exec_mode = weak constant i8 1 43@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 44@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_10301_87b2c_foo_l7_exec_mode], section "llvm.metadata" 45 46define weak void @__omp_offloading_10301_87b2c_foo_l7() { 47entry: 48 %.zero.addr = alloca i32, align 4 49 %.threadid_temp. = alloca i32, align 4 50 store i32 0, i32* %.zero.addr, align 4 51 %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) 52 %exec_user_code = icmp eq i32 %0, -1 53 br i1 %exec_user_code, label %user_code.entry, label %worker.exit 54 55user_code.entry: ; preds = %entry 56 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) 57 store i32 %1, i32* %.threadid_temp., align 4 58 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) 59 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) 60 ret void 61 62worker.exit: ; preds = %entry 63 ret void 64} 65 66declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) 67declare void @unknown() 68 69define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { 70entry: 71 %.global_tid..addr = alloca i32*, align 8 72 %.bound_tid..addr = alloca i32*, align 8 73 %captured_vars_addrs = alloca [0 x i8*], align 8 74 %captured_vars_addrs1 = alloca [0 x i8*], align 8 75 store i32* %.global_tid., i32** %.global_tid..addr, align 8 76 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 77 %0 = load i32*, i32** %.global_tid..addr, align 8 78 %1 = load i32, i32* %0, align 4 79 %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 80 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %2, i64 0) 81 call void @bar() 82 call void @unknown() 83 %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8** 84 call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %3, i64 0) 85 ret void 86} 87 88define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { 89entry: 90 %.global_tid..addr = alloca i32*, align 8 91 %.bound_tid..addr = alloca i32*, align 8 92 store i32* %.global_tid., i32** %.global_tid..addr, align 8 93 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 94 ret void 95} 96 97define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) { 98entry: 99 %.addr = alloca i16, align 2 100 %.addr1 = alloca i32, align 4 101 %.zero.addr = alloca i32, align 4 102 %global_args = alloca i8**, align 8 103 store i32 0, i32* %.zero.addr, align 4 104 store i16 %0, i16* %.addr, align 2 105 store i32 %1, i32* %.addr1, align 4 106 call void @__kmpc_get_shared_variables(i8*** %global_args) 107 call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) 108 ret void 109} 110 111declare void @__kmpc_get_shared_variables(i8***) 112 113declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) 114 115define hidden void @bar() { 116entry: 117 %captured_vars_addrs = alloca [0 x i8*], align 8 118 %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) 119 %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** 120 call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %1, i64 0) 121 ret void 122} 123 124define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { 125entry: 126 %.global_tid..addr = alloca i32*, align 8 127 %.bound_tid..addr = alloca i32*, align 8 128 store i32* %.global_tid., i32** %.global_tid..addr, align 8 129 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 130 ret void 131} 132 133define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) { 134entry: 135 %.addr = alloca i16, align 2 136 %.addr1 = alloca i32, align 4 137 %.zero.addr = alloca i32, align 4 138 %global_args = alloca i8**, align 8 139 store i32 0, i32* %.zero.addr, align 4 140 store i16 %0, i16* %.addr, align 2 141 store i32 %1, i32* %.addr1, align 4 142 call void @__kmpc_get_shared_variables(i8*** %global_args) 143 call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) 144 ret void 145} 146 147declare i32 @__kmpc_global_thread_num(%struct.ident_t*) 148 149declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) 150 151define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { 152entry: 153 %.global_tid..addr = alloca i32*, align 8 154 %.bound_tid..addr = alloca i32*, align 8 155 store i32* %.global_tid., i32** %.global_tid..addr, align 8 156 store i32* %.bound_tid., i32** %.bound_tid..addr, align 8 157 ret void 158} 159 160define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) { 161entry: 162 %.addr = alloca i16, align 2 163 %.addr1 = alloca i32, align 4 164 %.zero.addr = alloca i32, align 4 165 %global_args = alloca i8**, align 8 166 store i32 0, i32* %.zero.addr, align 4 167 store i16 %0, i16* %.addr, align 2 168 store i32 %1, i32* %.addr1, align 4 169 call void @__kmpc_get_shared_variables(i8*** %global_args) 170 call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) 171 ret void 172} 173 174!omp_offload.info = !{!0} 175!nvvm.annotations = !{!1} 176!llvm.module.flags = !{!2, !3} 177 178!0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0} 179!1 = !{void ()* @__omp_offloading_10301_87b2c_foo_l7, !"kernel", i32 1} 180!2 = !{i32 7, !"openmp", i32 50} 181!3 = !{i32 7, !"openmp-device", i32 50} 182