1 // expected-no-diagnostics 2 3 #ifndef HEADER 4 #define HEADER 5 6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 7 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 8 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 9 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 10 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 11 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 12 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 13 14 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 15 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 16 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 17 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 18 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 19 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 20 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 21 22 #ifdef CK1 23 24 #define N 100 25 26 void p_vxv(int *v1, int *v2, int *v3, int n); 27 void t_vxv(int *v1, int *v2, int *v3, int n); 28 29 #pragma omp declare variant(t_vxv) match(construct={target}) 30 #pragma omp declare variant(p_vxv) match(construct={parallel}) 31 void vxv(int *v1, int *v2, int *v3, int n) { 32 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; 33 } 34 // CK1: define dso_local void @vxv 35 36 void p_vxv(int *v1, int *v2, int *v3, int n) { 37 #pragma omp for 38 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; 39 } 40 // CK1: define dso_local void @p_vxv 41 42 #pragma omp declare target 43 void t_vxv(int *v1, int *v2, int *v3, int n) { 44 #pragma distribute simd 45 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; 46 } 47 #pragma omp end declare target 48 // CK1: define dso_local void @t_vxv 49 50 51 // CK1-LABEL: define {{[^@]+}}@test 52 int test() { 53 int v1[N], v2[N], v3[N]; 54 55 // init 56 for (int i = 0; i < N; i++) { 57 v1[i] = (i + 1); 58 v2[i] = -(i + 1); 59 v3[i] = 0; 60 } 61 62 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) 63 { 64 vxv(v1, v2, v3, N); 65 } 66 // CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) 67 68 vxv(v1, v2, v3, N); 69 // CK1: call void @vxv 70 71 #pragma omp parallel 72 { 73 vxv(v1, v2, v3, N); 74 } 75 // CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void 76 77 return 0; 78 } 79 80 // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) 81 // CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void 82 // CK1: define internal void [[TARGET_REGION]]( 83 // CK1: call void @t_vxv 84 85 // CK1: define internal void [[PARALLEL_REGION]]( 86 // CK1: call void @p_vxv 87 #endif // CK1 88 89 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 90 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 91 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK2 92 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 93 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 94 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 95 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK2 96 97 // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 98 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 99 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 100 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 101 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 102 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 103 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 104 105 #ifdef CK2 106 107 void test_teams(int ***v1, int ***v2, int ***v3, int n); 108 void test_target(int ***v1, int ***v2, int ***v3, int n); 109 void test_parallel(int ***v1, int ***v2, int ***v3, int n); 110 111 #pragma omp declare variant(test_teams) match(construct = {teams}) 112 #pragma omp declare variant(test_target) match(construct = {target}) 113 #pragma omp declare variant(test_parallel) match(construct = {parallel}) 114 void test_base(int ***v1, int ***v2, int ***v3, int n) { 115 for (int i = 0; i < n; i++) 116 for (int j = 0; j < n; ++j) 117 for (int k = 0; k < n; ++k) 118 v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; 119 } 120 121 #pragma omp declare target 122 void test_teams(int ***v1, int ***v2, int ***v3, int n) { 123 #pragma omp distribute parallel for simd collapse(2) 124 for (int i = 0; i < n; ++i) 125 for (int j = 0; j < n; ++j) 126 for (int k = 0; k < n; ++k) 127 v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; 128 } 129 #pragma omp end declare target 130 131 #pragma omp declare target 132 void test_target(int ***v1, int ***v2, int ***v3, int n) { 133 #pragma omp parallel for simd collapse(3) 134 for (int i = 0; i < n; ++i) 135 for (int j = 0; j < n; ++j) 136 for (int k = 0; k < n; ++k) 137 v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; 138 } 139 #pragma omp end declare target 140 141 void test_parallel(int ***v1, int ***v2, int ***v3, int n) { 142 #pragma omp for collapse(3) 143 for (int i = 0; i < n; ++i) 144 for (int j = 0; j < n; ++j) 145 for (int k = 0; k < n; ++k) 146 v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; 147 } 148 149 // CK2-LABEL: define {{[^@]+}}@test 150 void test(int ***v1, int ***v2, int ***v3, int n) { 151 int i; 152 153 #pragma omp target 154 #pragma omp teams 155 { 156 test_base(v1, v2, v3, 0); 157 } 158 // CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) 159 160 #pragma omp target 161 { 162 test_base(v1, v2, v3, 0); 163 } 164 // CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) 165 166 #pragma omp parallel 167 { 168 test_base(v1, v2, v3, 0); 169 } 170 // CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[PARALLEL_REGION:@.+]] to void 171 } 172 173 // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}}) 174 // CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[TARGET_REGION_1:@.+]] to void 175 // CK2: define internal void [[TARGET_REGION_1]]( 176 // CK2: call void @test_teams 177 178 // CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}}) 179 // CK2: call void @test_target 180 181 // CK2: define internal void [[PARALLEL_REGION]]( 182 // CK2: call void @test_parallel 183 184 #endif // CK2 185 186 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 187 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 188 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK3 189 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 190 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 191 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 192 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK3 193 194 // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 195 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 196 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 197 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 198 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 199 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 200 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 201 202 #ifdef CK3 203 204 #define N 100 205 206 int t_for(int *v1, int *v2, int *v3, int n); 207 int t_simd(int *v1, int *v2, int *v3, int n); 208 209 #pragma omp declare variant(t_simd) match(construct = {simd}) 210 #pragma omp declare variant(t_for) match(construct = {for}) 211 int t(int *v1, int *v2, int *v3, int idx) { 212 return v1[idx] * v2[idx]; 213 } 214 215 int t_for(int *v1, int *v2, int *v3, int idx) { 216 return v1[idx] * v2[idx]; 217 } 218 219 #pragma omp declare simd 220 int t_simd(int *v1, int *v2, int *v3, int idx) { 221 return v1[idx] * v2[idx]; 222 } 223 224 // CK3-LABEL: define {{[^@]+}}@test 225 void test() { 226 int v1[N], v2[N], v3[N]; 227 228 // init 229 for (int i = 0; i < N; i++) { 230 v1[i] = (i + 1); 231 v2[i] = -(i + 1); 232 v3[i] = 0; 233 } 234 235 #pragma omp simd 236 for (int i = 0; i < N; i++) { 237 v3[i] = t(v1, v2, v3, i); 238 } 239 // CK3: call = call i32 @t_simd 240 241 242 #pragma omp for 243 for (int i = 0; i < N; i++) { 244 v3[i] = t(v1, v2, v3, i); 245 } 246 // CK3: call{{.+}} = call i32 @t_for 247 } 248 249 #endif // CK3 250 251 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 252 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 253 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK4 254 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 255 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 256 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 257 // RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK4 258 259 // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 260 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s 261 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 262 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 263 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 264 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s 265 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" 266 267 #ifdef CK4 268 269 #define N 100 270 271 void not_selected_vxv(int *v1, int *v2, int *v3, int n); 272 void combined_vxv(int *v1, int *v2, int *v3, int n); 273 void all_vxv(int *v1, int *v2, int *v3, int n); 274 275 #pragma omp declare variant(all_vxv) match(construct={target,teams,parallel,for,simd}) 276 #pragma omp declare variant(combined_vxv) match(construct={target,teams,parallel,for}) 277 #pragma omp declare variant(not_selected_vxv) match(construct={parallel,for}) 278 void vxv(int *v1, int *v2, int *v3, int n) { 279 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; 280 } 281 282 void not_selected_vxv(int *v1, int *v2, int *v3, int n) { 283 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; 284 } 285 286 #pragma omp declare target 287 void combined_vxv(int *v1, int *v2, int *v3, int n) { 288 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; 289 } 290 #pragma omp end declare target 291 292 #pragma omp declare target 293 void all_vxv(int *v1, int *v2, int *v3, int n) { 294 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 4; 295 } 296 #pragma omp end declare target 297 298 // CK4-LABEL: define {{[^@]+}}@test 299 void test() { 300 int v1[N], v2[N], v3[N]; 301 302 //init 303 for (int i = 0; i < N; i++) { 304 v1[i] = (i + 1); 305 v2[i] = -(i + 1); 306 v3[i] = 0; 307 } 308 309 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) 310 { 311 #pragma omp parallel for 312 for (int i = 0; i < N; i++) 313 vxv(v1, v2, v3, N); 314 } 315 // CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) 316 317 #pragma omp simd 318 for (int i = 0; i < N; i++) 319 vxv(v1, v2, v3, N); 320 // CK4: call void @vxv 321 322 #pragma omp target teams distribute parallel for simd map(from: v3[:N]) 323 for (int i = 0; i < N; i++) 324 for (int i = 0; i < N; i++) 325 for (int i = 0; i < N; i++) 326 vxv(v1, v2, v3, N); 327 // CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) 328 } 329 // CK4-DAG: call void @all_vxv 330 // CK4-DAG: call void @combined_vxv 331 332 #endif // CK4 333 334 #endif // HEADER 335