1 // Test host codegen. 2 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 3 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 6 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 8 9 // Test target codegen - host bc file has to be created first. 10 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc 11 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 12 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s 13 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 14 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc 15 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 16 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s 17 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 18 19 // expected-no-diagnostics 20 #ifndef HEADER 21 #define HEADER 22 23 // CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } 24 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" 25 // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } 26 27 // CHECK-DAG: [[S1:%.+]] = type { double } 28 // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } 29 // CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } 30 // CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } 31 32 // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } 33 34 // We have 6 target regions 35 36 // CHECK-DAG: @{{.*}} = private constant i8 0 37 // CHECK-DAG: @{{.*}} = private constant i8 0 38 // CHECK-DAG: @{{.*}} = private constant i8 0 39 // CHECK-DAG: @{{.*}} = private constant i8 0 40 // CHECK-DAG: @{{.*}} = private constant i8 0 41 // CHECK-DAG: @{{.*}} = private constant i8 0 42 43 // TCHECK: @{{.+}} = constant [[ENTTY]] 44 // TCHECK: @{{.+}} = constant [[ENTTY]] 45 // TCHECK: @{{.+}} = constant [[ENTTY]] 46 // TCHECK: @{{.+}} = constant [[ENTTY]] 47 // TCHECK: @{{.+}} = constant [[ENTTY]] 48 // TCHECK: @{{.+}} = constant [[ENTTY]] 49 50 // Check if offloading descriptor is created. 51 // CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] 52 // CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] 53 // CHECK: [[DEVBEGIN:@.+]] = external constant i8 54 // CHECK: [[DEVEND:@.+]] = external constant i8 55 // CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }] 56 // CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] } 57 58 // Check target registration is registered as a Ctor. 59 // CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }] 60 61 62 template<typename tx> 63 tx ftemplate(int n) { 64 tx a = 0; 65 66 #pragma omp target parallel num_threads(tx(20)) 67 { 68 } 69 70 short b = 1; 71 #pragma omp target parallel num_threads(b) 72 { 73 a += b; 74 } 75 76 return a; 77 } 78 79 static 80 int fstatic(int n) { 81 82 #pragma omp target parallel num_threads(n) 83 { 84 } 85 86 #pragma omp target parallel num_threads(32+n) 87 { 88 } 89 90 return n+1; 91 } 92 93 struct S1 { 94 double a; 95 96 int r1(int n){ 97 int b = 1; 98 99 #pragma omp target parallel num_threads(n-b) 100 { 101 this->a = (double)b + 1.5; 102 } 103 104 #pragma omp target parallel num_threads(1024) 105 { 106 this->a = 2.5; 107 } 108 109 return (int)a; 110 } 111 }; 112 113 // CHECK: define {{.*}}@{{.*}}bar{{.*}} 114 int bar(int n){ 115 int a = 0; 116 117 S1 S; 118 // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) 119 a += S.r1(n); 120 121 // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) 122 a += fstatic(n); 123 124 // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) 125 a += ftemplate<int>(n); 126 127 return a; 128 } 129 130 131 132 // 133 // CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]]) 134 // 135 // CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align 136 // CHECK: store i32 1, i32* [[B:%.+]], align 137 // CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align 138 // CHECK: [[BV:%.+]] = load i32, i32* [[B]], align 139 // CHECK: [[SUB:%.+]] = sub nsw i32 [[NV]], [[BV]] 140 // CHECK: store i32 [[SUB]], i32* [[CAPE_ADDR:%.+]], align 141 // CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align 142 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* 143 // CHECK-64: store i32 [[CEV]], i32* [[CONV]], align 144 // CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align 145 // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align 146 // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align 147 // 148 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) 149 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 150 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 151 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 152 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 153 // 154 // CHECK: [[FAIL]] 155 // CHECK: call void [[HVT1:@.+]]([[S1]]* {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]]) 156 // CHECK: br label {{%?}}[[END]] 157 // CHECK: [[END]] 158 // 159 // 160 // 161 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024) 162 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 163 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 164 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 165 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 166 // 167 // CHECK: [[FAIL]] 168 // CHECK: call void [[HVT2:@.+]]([[S1]]* {{[^,]+}}) 169 // CHECK: br label {{%?}}[[END]] 170 // CHECK: [[END]] 171 // 172 173 174 175 176 177 178 // 179 // CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]]) 180 // 181 // CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align 182 // CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align 183 // CHECK: store i32 [[NV]], i32* [[CAPE_ADDR:%.+]], align 184 // CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align 185 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* 186 // CHECK-64: store i32 [[CEV]], i32* [[CONV]], align 187 // CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align 188 // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align 189 // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align 190 // 191 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) 192 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 193 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 194 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 195 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 196 // 197 // CHECK: [[FAIL]] 198 // CHECK: call void [[HVT3:@.+]](i[[SZ]] [[ARG]]) 199 // CHECK: br label {{%?}}[[END]] 200 // CHECK: [[END]] 201 // 202 // 203 // 204 // CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align 205 // CHECK: [[ADD:%.+]] = add nsw i32 32, [[NV]] 206 // CHECK: store i32 [[ADD]], i32* [[CAPE_ADDR:%.+]], align 207 // CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align 208 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* 209 // CHECK-64: store i32 [[CEV]], i32* [[CONV]], align 210 // CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align 211 // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align 212 // CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align 213 // 214 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) 215 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 216 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 217 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 218 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 219 // 220 // CHECK: [[FAIL]] 221 // CHECK: call void [[HVT4:@.+]](i[[SZ]] [[ARG]]) 222 // CHECK: br label {{%?}}[[END]] 223 // CHECK: [[END]] 224 // 225 226 227 228 229 230 231 // 232 // CHECK: define {{.*}}[[FTEMPLATE]] 233 // 234 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20) 235 // CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align 236 // CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align 237 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 238 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 239 // 240 // CHECK: [[FAIL]] 241 // CHECK: call void [[HVT5:@.+]]() 242 // CHECK: br label {{%?}}[[END]] 243 // 244 // CHECK: [[END]] 245 // 246 // 247 // 248 // CHECK: store i16 1, i16* [[B:%.+]], align 249 // CHECK: [[BV:%.+]] = load i16, i16* [[B]], align 250 // CHECK: store i16 [[BV]], i16* [[CAPE_ADDR:%.+]], align 251 // CHECK: [[CEV:%.+]] = load i16, i16* [[CAPE_ADDR]], align 252 // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i16* 253 // CHECK: store i16 [[CEV]], i16* [[CONV]], align 254 // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align 255 // CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align 256 // CHECK: [[THREADS:%.+]] = sext i16 [[T]] to i32 257 // 258 // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) 259 // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 260 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 261 // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 262 // CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] 263 // 264 // CHECK: [[FAIL]] 265 // CHECK: call void [[HVT6:@.+]](i[[SZ]] {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]]) 266 // CHECK: br label {{%?}}[[END]] 267 // CHECK: [[END]] 268 // 269 270 271 272 273 274 275 // Check that the offloading functions are emitted and that the parallel function 276 // is appropriately guarded. 277 278 // CHECK: define internal void [[HVT1]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]]) 279 // CHECK-DAG: store i[[SZ]] [[PARM2]], i[[SZ]]* [[CAPE_ADDR:%.+]], align 280 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* 281 // CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align 282 // CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align 283 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) 284 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, 285 // 286 // 287 288 289 // CHECK: define internal void [[HVT2]]([[S1]]* {{%.+}}) 290 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1024) 291 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1, 292 // 293 // 294 295 296 297 298 299 300 301 302 // CHECK: define internal void [[HVT3]](i[[SZ]] [[PARM:%.+]]) 303 // CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align 304 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* 305 // CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align 306 // CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align 307 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) 308 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, 309 // 310 // 311 // CHECK: define internal void [[HVT4]](i[[SZ]] [[PARM:%.+]]) 312 // CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align 313 // CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* 314 // CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align 315 // CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align 316 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) 317 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, 318 // 319 // 320 321 322 323 324 325 // CHECK: define internal void [[HVT5]]( 326 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20) 327 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, 328 // 329 // 330 331 332 // CHECK: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) 333 // CHECK-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align 334 // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* 335 // CHECK: [[T:%.+]] = load i16, i16* [[CONV]], align 336 // CHECK: [[NT:%.+]] = sext i16 [[T]] to i32 337 // CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) 338 // CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, 339 // 340 // 341 342 343 344 #endif 345