1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s 2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s 3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s 4 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG 5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=50 -DOMP5 7 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 8 9 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s 10 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s 11 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s 12 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix=TERM_DEBUG %s 13 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 14 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -fopenmp-version=50 -DOMP5 15 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=50 -DOMP5 | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50 16 // expected-no-diagnostics 17 #ifndef HEADER 18 #define HEADER 19 20 // CHECK: [[SS_TY:%.+]] = type { i32 } 21 22 long long get_val() { return 0; } 23 double *g_ptr; 24 25 struct S { 26 int a, b; 27 }; 28 29 // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) 30 void simple(float *a, float *b, float *c, float *d) { 31 S s, *p; 32 #ifdef OMP5 33 #pragma omp simd if (simd: true) nontemporal(a, b, c, d, s) 34 #else 35 #pragma omp simd 36 #endif 37 // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] 38 39 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 40 // CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], 6 41 // CHECK-NEXT: br i1 [[CMP]], label %[[SIMPLE_LOOP1_BODY:.+]], label %[[SIMPLE_LOOP1_END:[^,]+]] 42 for (int i = 3; i < 32; i += 5) { 43 // CHECK: [[SIMPLE_LOOP1_BODY]]: 44 // Start of body: calculate i from IV: 45 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 46 // CHECK: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 5 47 // CHECK-NEXT: [[CALC_I_2:%.+]] = add nsw i32 3, [[CALC_I_1]] 48 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]{{.*}}!llvm.access.group 49 // ... loop body ... 50 // End of body: store into a[i]: 51 // OMP45-NOT: load float*,{{.*}}!nontemporal 52 // CHECK-NOT: load float,{{.*}}!nontemporal 53 // OMP50: load float*,{{.*}}!nontemporal 54 // OMP50: load float*,{{.*}}!nontemporal 55 // OMP50: load float*,{{.*}}!nontemporal 56 // OMP50: load i32,{{.*}}!nontemporal 57 // OMP50-NOT: load i32,{{.*}}!nontemporal 58 // OMP50: load float*,{{.*}}!nontemporal 59 // CHECK-NOT: load float,{{.*}}!nontemporal 60 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}{{.*}}!llvm.access.group 61 a[i] = b[i] * c[i] * d[i] + s.a + p->a; 62 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 63 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 64 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group 65 // br label %{{.+}}, !llvm.loop !{{.+}} 66 } 67 // CHECK: [[SIMPLE_LOOP1_END]]: 68 69 long long k = get_val(); 70 71 #pragma omp simd linear(k : 3) 72 // CHECK: [[K0:%.+]] = call {{.*}}i64 @{{.*}}get_val 73 // CHECK-NEXT: store i64 [[K0]], i64* [[K_VAR:%[^,]+]] 74 // CHECK: store i32 0, i32* [[OMP_IV2:%[^,]+]] 75 // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR]] 76 // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] 77 78 // CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group 79 // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV2]], 9 80 // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP2_BODY:.+]], label %[[SIMPLE_LOOP2_END:[^,]+]] 81 for (int i = 10; i > 1; i--) { 82 // CHECK: [[SIMPLE_LOOP2_BODY]]: 83 // Start of body: calculate i from IV: 84 // CHECK: [[IV2_0:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group 85 // FIXME: It is interesting, why the following "mul 1" was not constant folded? 86 // CHECK-NEXT: [[IV2_1:%.+]] = mul nsw i32 [[IV2_0]], 1 87 // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV2_1]] 88 // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group 89 // 90 // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group 91 // CHECK-NEXT: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group 92 // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV2_2]], 3 93 // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 94 // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] 95 // Update of the privatized version of linear variable! 96 // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] 97 a[k]++; 98 k = k + 3; 99 // CHECK: [[IV2_2:%.+]] = load i32, i32* [[OMP_IV2]]{{.*}}!llvm.access.group 100 // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV2_2]], 1 101 // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV2]]{{.*}}!llvm.access.group 102 // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP2_ID]] 103 } 104 // CHECK: [[SIMPLE_LOOP2_END]]: 105 // 106 // Update linear vars after loop, as the loop was operating on a private version. 107 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] 108 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 109 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_VAR]] 110 // 111 112 int lin = 12; 113 #pragma omp simd linear(lin : get_val()), linear(g_ptr) 114 115 // Init linear private var. 116 // CHECK: store i32 12, i32* [[LIN_VAR:%[^,]+]] 117 // CHECK: store i64 0, i64* [[OMP_IV3:%[^,]+]] 118 119 // CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]] 120 // CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]] 121 // Remember linear step. 122 // CHECK: [[CALL_VAL:%.+]] = invoke 123 // CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]] 124 125 // CHECK: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR:@[^,]+]] 126 // CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]] 127 128 // CHECK: [[IV3:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group 129 // CHECK-NEXT: [[CMP3:%.+]] = icmp ult i64 [[IV3]], 4 130 // CHECK-NEXT: br i1 [[CMP3]], label %[[SIMPLE_LOOP3_BODY:.+]], label %[[SIMPLE_LOOP3_END:[^,]+]] 131 for (unsigned long long it = 2000; it >= 600; it-=400) { 132 // CHECK: [[SIMPLE_LOOP3_BODY]]: 133 // Start of body: calculate it from IV: 134 // CHECK: [[IV3_0:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group 135 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul i64 [[IV3_0]], 400 136 // CHECK-NEXT: [[LC_IT_2:%.+]] = sub i64 2000, [[LC_IT_1]] 137 // CHECK-NEXT: store i64 [[LC_IT_2]], i64* {{.+}}, !llvm.access.group 138 // 139 // Linear start and step are used to calculate current value of the linear variable. 140 // CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]{{.*}}!llvm.access.group 141 // CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]{{.*}}!llvm.access.group 142 // CHECK-NOT: store i32 {{.+}}, i32* [[LIN_VAR]],{{.*}}!llvm.access.group 143 // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]{{.*}}!llvm.access.group 144 // CHECK-NEXT: [[IV3_1:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group 145 // CHECK-NEXT: [[MUL:%.+]] = mul i64 [[IV3_1]], 1 146 // CHECK: [[GEP:%.+]] = getelementptr{{.*}}[[GLINSTART]] 147 // CHECK-NEXT: store double* [[GEP]], double** [[G_PTR_CUR:%[^,]+]]{{.*}}!llvm.access.group 148 *g_ptr++ = 0.0; 149 // CHECK: [[GEP_VAL:%.+]] = load double{{.*}}[[G_PTR_CUR]]{{.*}}!llvm.access.group 150 // CHECK: store double{{.*}}[[GEP_VAL]]{{.*}}!llvm.access.group 151 a[it + lin]++; 152 // CHECK: [[FLT_INC:%.+]] = fadd float 153 // CHECK-NEXT: store float [[FLT_INC]],{{.*}}!llvm.access.group 154 // CHECK: [[IV3_2:%.+]] = load i64, i64* [[OMP_IV3]]{{.*}}!llvm.access.group 155 // CHECK-NEXT: [[ADD3_2:%.+]] = add i64 [[IV3_2]], 1 156 // CHECK-NEXT: store i64 [[ADD3_2]], i64* [[OMP_IV3]]{{.*}}!llvm.access.group 157 } 158 // CHECK: [[SIMPLE_LOOP3_END]]: 159 // 160 // Linear start and step are used to calculate final value of the linear variables. 161 // CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]] 162 // CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]] 163 // CHECK: store i32 {{.+}}, i32* [[LIN_VAR]], 164 // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]] 165 // CHECK: store double* {{.*}}[[GLIN_VAR]] 166 167 #pragma omp simd 168 // CHECK: store i32 0, i32* [[OMP_IV4:%[^,]+]] 169 170 // CHECK: [[IV4:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group 171 // CHECK-NEXT: [[CMP4:%.+]] = icmp slt i32 [[IV4]], 4 172 // CHECK-NEXT: br i1 [[CMP4]], label %[[SIMPLE_LOOP4_BODY:.+]], label %[[SIMPLE_LOOP4_END:[^,]+]] 173 for (short it = 6; it <= 20; it-=-4) { 174 // CHECK: [[SIMPLE_LOOP4_BODY]]: 175 // Start of body: calculate it from IV: 176 // CHECK: [[IV4_0:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group 177 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV4_0]], 4 178 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 6, [[LC_IT_1]] 179 // CHECK-NEXT: [[LC_IT_3:%.+]] = trunc i32 [[LC_IT_2]] to i16 180 // CHECK-NEXT: store i16 [[LC_IT_3]], i16* {{.+}}, !llvm.access.group 181 182 // CHECK: [[IV4_2:%.+]] = load i32, i32* [[OMP_IV4]]{{.*}}!llvm.access.group 183 // CHECK-NEXT: [[ADD4_2:%.+]] = add nsw i32 [[IV4_2]], 1 184 // CHECK-NEXT: store i32 [[ADD4_2]], i32* [[OMP_IV4]]{{.*}}!llvm.access.group 185 } 186 // CHECK: [[SIMPLE_LOOP4_END]]: 187 188 #pragma omp simd 189 // CHECK: store i32 0, i32* [[OMP_IV5:%[^,]+]] 190 191 // CHECK: [[IV5:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group 192 // CHECK-NEXT: [[CMP5:%.+]] = icmp slt i32 [[IV5]], 26 193 // CHECK-NEXT: br i1 [[CMP5]], label %[[SIMPLE_LOOP5_BODY:.+]], label %[[SIMPLE_LOOP5_END:[^,]+]] 194 for (unsigned char it = 'z'; it >= 'a'; it+=-1) { 195 // CHECK: [[SIMPLE_LOOP5_BODY]]: 196 // Start of body: calculate it from IV: 197 // CHECK: [[IV5_0:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group 198 // CHECK-NEXT: [[IV5_1:%.+]] = mul nsw i32 [[IV5_0]], 1 199 // CHECK-NEXT: [[LC_IT_1:%.+]] = sub nsw i32 122, [[IV5_1]] 200 // CHECK-NEXT: [[LC_IT_2:%.+]] = trunc i32 [[LC_IT_1]] to i8 201 // CHECK-NEXT: store i8 [[LC_IT_2]], i8* {{.+}}, !llvm.access.group 202 203 // CHECK: [[IV5_2:%.+]] = load i32, i32* [[OMP_IV5]]{{.*}}!llvm.access.group 204 // CHECK-NEXT: [[ADD5_2:%.+]] = add nsw i32 [[IV5_2]], 1 205 // CHECK-NEXT: store i32 [[ADD5_2]], i32* [[OMP_IV5]]{{.*}}!llvm.access.group 206 } 207 // CHECK: [[SIMPLE_LOOP5_END]]: 208 209 // CHECK-NOT: mul i32 %{{.+}}, 10 210 #pragma omp simd 211 for (unsigned i=100; i<10; i+=10) { 212 } 213 214 int A; 215 // CHECK: store i32 -1, i32* [[A:%.+]], 216 A = -1; 217 #pragma omp simd lastprivate(A) 218 // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]] 219 // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] 220 // CHECK: [[SIMD_LOOP7_COND]]: 221 // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group 222 // CHECK-NEXT: [[CMP7:%.+]] = icmp slt i64 [[IV7]], 7 223 // CHECK-NEXT: br i1 [[CMP7]], label %[[SIMPLE_LOOP7_BODY:.+]], label %[[SIMPLE_LOOP7_END:[^,]+]] 224 for (long long i = -10; i < 10; i += 3) { 225 // CHECK: [[SIMPLE_LOOP7_BODY]]: 226 // Start of body: calculate i from IV: 227 // CHECK: [[IV7_0:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group 228 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV7_0]], 3 229 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] 230 // CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.access.group 231 // CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.access.group 232 // CHECK-NEXT: [[CONV:%.+]] = trunc i64 [[LC_VAL]] to i32 233 // CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV:%[^,]+]],{{.+}}!llvm.access.group 234 A = i; 235 // CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group 236 // CHECK-NEXT: [[ADD7_2:%.+]] = add nsw i64 [[IV7_2]], 1 237 // CHECK-NEXT: store i64 [[ADD7_2]], i64* [[OMP_IV7]]{{.*}}!llvm.access.group 238 } 239 // CHECK: [[SIMPLE_LOOP7_END]]: 240 // CHECK-NEXT: store i64 11, i64* 241 // CHECK-NEXT: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]], 242 // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* [[A]], 243 int R; 244 // CHECK: store i32 -1, i32* [[R:%[^,]+]], 245 R = -1; 246 // CHECK: store i64 0, i64* [[OMP_IV8:%[^,]+]], 247 // CHECK: store i32 1, i32* [[R_PRIV:%[^,]+]], 248 #ifdef OMP5 249 #pragma omp simd reduction(*:R) if(A) 250 #else 251 #pragma omp simd reduction(*:R) 252 #endif 253 // OMP50: [[A_VAL:%.+]] = load i32, i32* [[A]], 254 // OMP50-NEXT: [[COND:%.+]] = icmp ne i32 [[A_VAL]], 0 255 // OMP50-NEXT: br i1 [[COND]], label {{%?}}[[THEN:[^,]+]], label {{%?}}[[ELSE:[^,]+]] 256 // OMP50: [[THEN]]: 257 258 // CHECK: br label %[[SIMD_LOOP8_COND:[^,]+]] 259 // CHECK: [[SIMD_LOOP8_COND]]: 260 // CHECK-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group 261 // CHECK-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7 262 // CHECK-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]] 263 for (long long i = -10; i < 10; i += 3) { 264 // CHECK: [[SIMPLE_LOOP8_BODY]]: 265 // Start of body: calculate i from IV: 266 // CHECK: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group 267 // CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3 268 // CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] 269 // CHECK-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{.+}}!llvm.access.group 270 // CHECK-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]]{{.+}}!llvm.access.group 271 // CHECK: store i32 %{{.+}}, i32* [[R_PRIV]],{{.+}}!llvm.access.group 272 R *= i; 273 // CHECK: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]]{{.*}}!llvm.access.group 274 // CHECK-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1 275 // CHECK-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]]{{.*}}!llvm.access.group 276 } 277 // CHECK: [[SIMPLE_LOOP8_END]]: 278 // OMP50: br label {{%?}}[[EXIT:[^,]+]] 279 // OMP50: br label %[[SIMD_LOOP8_COND:[^,]+]] 280 // OMP50: [[SIMD_LOOP8_COND]]: 281 // OMP50-NEXT: [[IV8:%.+]] = load i64, i64* [[OMP_IV8]],{{[^!]*}} 282 // OMP50-NEXT: [[CMP8:%.+]] = icmp slt i64 [[IV8]], 7 283 // OMP50-NEXT: br i1 [[CMP8]], label %[[SIMPLE_LOOP8_BODY:.+]], label %[[SIMPLE_LOOP8_END:[^,]+]] 284 // OMP50: [[SIMPLE_LOOP8_BODY]]: 285 // Start of body: calculate i from IV: 286 // OMP50: [[IV8_0:%.+]] = load i64, i64* [[OMP_IV8]],{{[^!]*}} 287 // OMP50-NEXT: [[LC_IT_1:%.+]] = mul nsw i64 [[IV8_0]], 3 288 // OMP50-NEXT: [[LC_IT_2:%.+]] = add nsw i64 -10, [[LC_IT_1]] 289 // OMP50-NEXT: store i64 [[LC_IT_2]], i64* [[LC:%[^,]+]],{{[^!]*}} 290 // OMP50-NEXT: [[LC_VAL:%.+]] = load i64, i64* [[LC]],{{[^!]*}} 291 // OMP50: store i32 %{{.+}}, i32* [[R_PRIV]],{{[^!]*}} 292 // OMP50: [[IV8_2:%.+]] = load i64, i64* [[OMP_IV8]],{{[^!]*}} 293 // OMP50-NEXT: [[ADD8_2:%.+]] = add nsw i64 [[IV8_2]], 1 294 // OMP50-NEXT: store i64 [[ADD8_2]], i64* [[OMP_IV8]],{{[^!]*}} 295 // OMP50: br label {{%?}}[[SIMD_LOOP8_COND]], {{.*}}!llvm.loop ![[DISABLE_VECT:.+]] 296 // OMP50: [[SIMPLE_LOOP8_END]]: 297 // OMP50: br label {{%?}}[[EXIT]] 298 // OMP50: [[EXIT]]: 299 300 // CHECK-DAG: [[R_VAL:%.+]] = load i32, i32* [[R]], 301 // CHECK-DAG: [[R_PRIV_VAL:%.+]] = load i32, i32* [[R_PRIV]], 302 // CHECK: [[RED:%.+]] = mul nsw i32 [[R_VAL]], [[R_PRIV_VAL]] 303 // CHECK-NEXT: store i32 [[RED]], i32* [[R]], 304 // CHECK-NEXT: ret void 305 } 306 307 template <class T, unsigned K> T tfoo(T a) { return a + K; } 308 309 template <typename T, unsigned N> 310 int templ1(T a, T *z) { 311 #pragma omp simd collapse(N) 312 for (int i = 0; i < N * 2; i++) { 313 for (long long j = 0; j < (N + N + N + N); j += 2) { 314 z[i + j] = a + tfoo<T, N>(i + j); 315 } 316 } 317 return 0; 318 } 319 320 // Instatiation templ1<float,2> 321 // CHECK-LABEL: define {{.*i32}} @{{.*}}templ1{{.*}}(float {{.+}}, float* {{.+}}) 322 // CHECK: store i64 0, i64* [[T1_OMP_IV:[^,]+]] 323 // ... 324 // CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 325 // CHECK-NEXT: [[CMP1:%.+]] = icmp slt i64 [[IV]], 16 326 // CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] 327 // CHECK: [[T1_BODY]]: 328 // Loop counters i and j updates: 329 // CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 330 // CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 331 // CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 332 // CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] 333 // CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 334 // CHECK-NEXT: store i32 [[I_2]], i32* {{%.+}}{{.*}}!llvm.access.group 335 // CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 336 // CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 337 // CHECK-NEXT: [[J_1_DIV1:%.+]] = sdiv i64 [[IV2_1]], 4 338 // CHECK-NEXT: [[J_1_MUL1:%.+]] = mul nsw i64 [[J_1_DIV1]], 4 339 // CHECK-NEXT: [[J_1_SUB0:%.+]] = sub nsw i64 [[IV2]], [[J_1_MUL1]] 340 // CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1_SUB0]], 2 341 // CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] 342 // CHECK-NEXT: store i64 [[J_2_ADD0]], i64* {{%.+}}{{.*}}!llvm.access.group 343 // simd.for.inc: 344 // CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 345 // CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 346 // CHECK-NEXT: store i64 [[INC]], i64* [[T1_OMP_IV]]{{.*}}!llvm.access.group 347 // CHECK-NEXT: br label {{%.+}} 348 // CHECK: [[T1_END]]: 349 // CHECK: ret i32 0 350 // 351 void inst_templ1() { 352 float a; 353 float z[100]; 354 templ1<float,2> (a, z); 355 } 356 357 358 typedef int MyIdx; 359 360 class IterDouble { 361 double *Ptr; 362 public: 363 IterDouble operator++ () const { 364 IterDouble n; 365 n.Ptr = Ptr + 1; 366 return n; 367 } 368 bool operator < (const IterDouble &that) const { 369 return Ptr < that.Ptr; 370 } 371 double & operator *() const { 372 return *Ptr; 373 } 374 MyIdx operator - (const IterDouble &that) const { 375 return (MyIdx) (Ptr - that.Ptr); 376 } 377 IterDouble operator + (int Delta) { 378 IterDouble re; 379 re.Ptr = Ptr + Delta; 380 return re; 381 } 382 383 ///~IterDouble() {} 384 }; 385 386 // CHECK-LABEL: define {{.*void}} @{{.*}}iter_simple{{.*}} 387 void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) { 388 // 389 // Calculate number of iterations before the loop body. 390 // CHECK: [[DIFF1:%.+]] = invoke {{.*}}i32 @{{.*}}IterDouble{{.*}} 391 // CHECK: [[DIFF2:%.+]] = sub nsw i32 [[DIFF1]], 1 392 // CHECK-NEXT: [[DIFF3:%.+]] = add nsw i32 [[DIFF2]], 1 393 // CHECK-NEXT: [[DIFF4:%.+]] = sdiv i32 [[DIFF3]], 1 394 // CHECK-NEXT: [[DIFF5:%.+]] = sub nsw i32 [[DIFF4]], 1 395 // CHECK-NEXT: store i32 [[DIFF5]], i32* [[OMP_LAST_IT:%[^,]+]]{{.+}} 396 // CHECK: store i32 0, i32* [[IT_OMP_IV:%[^,]+]] 397 #pragma omp simd 398 399 // CHECK: [[IV:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}} !llvm.access.group 400 // CHECK-NEXT: [[LAST_IT:%.+]] = load i32, i32* [[OMP_LAST_IT]]{{.+}}!llvm.access.group 401 // CHECK-NEXT: [[NUM_IT:%.+]] = add nsw i32 [[LAST_IT]], 1 402 // CHECK-NEXT: [[CMP:%.+]] = icmp slt i32 [[IV]], [[NUM_IT]] 403 // CHECK-NEXT: br i1 [[CMP]], label %[[IT_BODY:[^,]+]], label %[[IT_END:[^,]+]] 404 for (IterDouble i = ia; i < ib; ++i) { 405 // CHECK: [[IT_BODY]]: 406 // Start of body: calculate i from index: 407 // CHECK: [[IV1:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group 408 // Call of operator+ (i, IV). 409 // CHECK: {{%.+}} = invoke {{.+}} @{{.*}}IterDouble{{.*}} 410 // ... loop body ... 411 *i = *ic * 0.5; 412 // Float multiply and save result. 413 // CHECK: [[MULR:%.+]] = fmul double {{%.+}}, 5.000000e-01 414 // CHECK-NEXT: invoke {{.+}} @{{.*}}IterDouble{{.*}} 415 // CHECK: store double [[MULR:%.+]], double* [[RESULT_ADDR:%.+]], !llvm.access.group 416 ++ic; 417 // 418 // CHECK: [[IV2:%.+]] = load i32, i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group 419 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i32 [[IV2]], 1 420 // CHECK-NEXT: store i32 [[ADD2]], i32* [[IT_OMP_IV]]{{.+}}!llvm.access.group 421 // br label %{{.*}}, !llvm.loop ![[ITER_LOOP_ID]] 422 } 423 // CHECK: [[IT_END]]: 424 // CHECK: ret void 425 } 426 427 428 // CHECK-LABEL: define {{.*void}} @{{.*}}collapsed{{.*}} 429 void collapsed(float *a, float *b, float *c, float *d) { 430 int i; // outer loop counter 431 unsigned j; // middle loop couter, leads to unsigned icmp in loop header. 432 // k declared in the loop init below 433 short l; // inner loop counter 434 // CHECK: store i32 0, i32* [[OMP_IV:[^,]+]] 435 // 436 #pragma omp simd collapse(4) 437 438 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 439 // CHECK-NEXT: [[CMP:%.+]] = icmp ult i32 [[IV]], 120 440 // CHECK-NEXT: br i1 [[CMP]], label %[[COLL1_BODY:[^,]+]], label %[[COLL1_END:[^,]+]] 441 for (i = 1; i < 3; i++) // 2 iterations 442 for (j = 2u; j < 5u; j++) //3 iterations 443 for (int k = 3; k <= 6; k++) // 4 iterations 444 for (l = 4; l < 9; ++l) // 5 iterations 445 { 446 // CHECK: [[COLL1_BODY]]: 447 // Start of body: calculate i from index: 448 // CHECK: [[IV1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 449 // Calculation of the loop counters values. 450 // CHECK: [[CALC_I_1:%.+]] = udiv i32 [[IV1]], 60 451 // CHECK-NEXT: [[CALC_I_1_MUL1:%.+]] = mul i32 [[CALC_I_1]], 1 452 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i32 1, [[CALC_I_1_MUL1]] 453 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] 454 455 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 456 // CHECK: [[IV1_2_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 457 // CHECK-NEXT: [[CALC_J_1:%.+]] = udiv i32 [[IV1_2_1]], 60 458 // CHECK-NEXT: [[MUL_1:%.+]] = mul i32 [[CALC_J_1]], 60 459 // CHECK-NEXT: [[SUB_3:%.+]] = sub i32 [[IV1_2]], [[MUL_1]] 460 // CHECK-NEXT: [[CALC_J_2:%.+]] = udiv i32 [[SUB_3]], 20 461 // CHECK-NEXT: [[CALC_J_2_MUL1:%.+]] = mul i32 [[CALC_J_2]], 1 462 // CHECK-NEXT: [[CALC_J_3:%.+]] = add i32 2, [[CALC_J_2_MUL1]] 463 // CHECK-NEXT: store i32 [[CALC_J_3]], i32* [[LC_J:.+]] 464 465 // CHECK: [[IV1_3:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 466 // CHECK: [[IV1_3_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 467 // CHECK-NEXT: [[DIV_1:%.+]] = udiv i32 [[IV1_3_1]], 60 468 // CHECK-NEXT: [[MUL_2:%.+]] = mul i32 [[DIV_1]], 60 469 // CHECK-NEXT: [[ADD_3:%.+]] = sub i32 [[IV1_3]], [[MUL_2]] 470 471 // CHECK: [[IV1_4:%.+]] = load i32, i32* [[OMP_IV]] 472 // CHECK: [[IV1_4_1:%.+]] = load i32, i32* [[OMP_IV]] 473 // CHECK-NEXT: [[DIV_2:%.+]] = udiv i32 [[IV1_4_1]], 60 474 // CHECK-NEXT: [[MUL_3:%.+]] = mul i32 [[DIV_2]], 60 475 // CHECK-NEXT: [[SUB_6:%.+]] = sub i32 [[IV1_4]], [[MUL_3]] 476 // CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[SUB_6]], 20 477 // CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 20 478 // CHECK-NEXT: [[ADD_5:%.+]] = sub i32 [[ADD_3]], [[MUL_4]] 479 // CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[ADD_5]], 5 480 // CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 1 481 // CHECK-NEXT: [[ADD_6:%.+]] = add i32 3, [[MUL_5]] 482 // CHECK-NEXT: store i32 [[ADD_6]], i32* [[LC_K:.+]] 483 484 // CHECK: [[IV1_5:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 485 // CHECK: [[IV1_5_1:%.+]] = load i32, i32* [[OMP_IV]]{{.+}}!llvm.access.group 486 // CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[IV1_5_1]], 60 487 // CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 60 488 // CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_5]], [[MUL_6]] 489 490 // CHECK: [[IV1_6:%.+]] = load i32, i32* [[OMP_IV]] 491 // CHECK: [[IV1_6_1:%.+]] = load i32, i32* [[OMP_IV]] 492 // CHECK-NEXT: [[DIV_6:%.+]] = udiv i32 [[IV1_6_1]], 60 493 // CHECK-NEXT: [[MUL_7:%.+]] = mul i32 [[DIV_6]], 60 494 // CHECK-NEXT: [[SUB_10:%.+]] = sub i32 [[IV1_6]], [[MUL_7]] 495 // CHECK-NEXT: [[DIV_7:%.+]] = udiv i32 [[SUB_10]], 20 496 // CHECK-NEXT: [[MUL_8:%.+]] = mul i32 [[DIV_7]], 20 497 // CHECK-NEXT: [[SUB_11:%.+]] = sub i32 [[SUB_7]], [[MUL_8]] 498 499 // CHECK: [[IV1_7:%.+]] = load i32, i32* [[OMP_IV]] 500 // CHECK: [[IV1_7_1:%.+]] = load i32, i32* [[OMP_IV]] 501 // CHECK-NEXT: [[DIV_8:%.+]] = udiv i32 [[IV1_7_1]], 60 502 // CHECK-NEXT: [[MUL_9:%.+]] = mul i32 [[DIV_8]], 60 503 // CHECK-NEXT: [[SUB_12:%.+]] = sub i32 [[IV1_7]], [[MUL_9]] 504 505 // CHECK: [[IV1_8:%.+]] = load i32, i32* [[OMP_IV]] 506 // CHECK: [[IV1_8_1:%.+]] = load i32, i32* [[OMP_IV]] 507 // CHECK-NEXT: [[DIV_3:%.+]] = udiv i32 [[IV1_8_1]], 60 508 // CHECK-NEXT: [[MUL_4:%.+]] = mul i32 [[DIV_3]], 60 509 // CHECK-NEXT: [[SUB_7:%.+]] = sub i32 [[IV1_8]], [[MUL_4]] 510 // CHECK-NEXT: [[DIV_4:%.+]] = udiv i32 [[SUB_7]], 20 511 // CHECK-NEXT: [[MUL_5:%.+]] = mul i32 [[DIV_4]], 20 512 // CHECK-NEXT: [[SUB_8:%.+]] = sub i32 [[SUB_12]], [[MUL_5]] 513 // CHECK-NEXT: [[DIV_5:%.+]] = udiv i32 [[SUB_8]], 5 514 // CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[DIV_5]], 5 515 // CHECK-NEXT: [[SUB_9:%.+]] = sub i32 [[SUB_11]], [[MUL_6]] 516 // CHECK-NEXT: [[MUL_6:%.+]] = mul i32 [[SUB_9]], 1 517 // CHECK-NEXT: [[CALC_L_2:%.+]] = add i32 4, [[MUL_6]] 518 // CHECK-NEXT: [[CALC_L_3:%.+]] = trunc i32 [[CALC_L_2]] to i16 519 // CHECK-NEXT: store i16 [[CALC_L_3]], i16* [[LC_L:.+]] 520 // ... loop body ... 521 // End of body: store into a[i]: 522 // CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.access.group 523 float res = b[j] * c[k]; 524 a[i] = res * d[l]; 525 // CHECK: [[IV2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 526 // CHECK-NEXT: [[ADD2:%.+]] = add i32 [[IV2]], 1 527 // CHECK-NEXT: store i32 [[ADD2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group 528 // br label %{{[^,]+}}, !llvm.loop ![[COLL1_LOOP_ID]] 529 // CHECK: [[COLL1_END]]: 530 } 531 // i,j,l are updated; k is not updated. 532 // CHECK: store i32 3, i32* 533 // CHECK-NEXT: store i32 5, i32* 534 // CHECK-NEXT: store i32 7, i32* 535 // CHECK-NEXT: store i16 9, i16* 536 // CHECK: ret void 537 } 538 539 extern char foo(); 540 extern double globalfloat; 541 542 // CHECK-LABEL: define {{.*void}} @{{.*}}widened{{.*}} 543 void widened(float *a, float *b, float *c, float *d) { 544 int i; // outer loop counter 545 short j; // inner loop counter 546 globalfloat = 1.0; 547 int localint = 1; 548 // CHECK: store double {{.+}}, double* [[GLOBALFLOAT:@.+]] 549 // Counter is widened to 64 bits. 550 // CHECK: store i64 0, i64* [[OMP_IV:[^,]+]] 551 // 552 #pragma omp simd collapse(2) private(globalfloat, localint) 553 554 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group 555 // CHECK-NEXT: [[LI:%.+]] = load i64, i64* [[OMP_LI:%[^,]+]]{{.+}}!llvm.access.group 556 // CHECK-NEXT: [[NUMIT:%.+]] = add nsw i64 [[LI]], 1 557 // CHECK-NEXT: [[CMP:%.+]] = icmp slt i64 [[IV]], [[NUMIT]] 558 // CHECK-NEXT: br i1 [[CMP]], label %[[WIDE1_BODY:[^,]+]], label %[[WIDE1_END:[^,]+]] 559 for (i = 1; i < 3; i++) // 2 iterations 560 for (j = 0; j < foo(); j++) // foo() iterations 561 { 562 // CHECK: [[WIDE1_BODY]]: 563 // Start of body: calculate i from index: 564 // CHECK: [[IV1:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group 565 // Calculation of the loop counters values... 566 // CHECK: store i32 {{[^,]+}}, i32* [[LC_I:.+]] 567 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.+}}!llvm.access.group 568 // CHECK: store i16 {{[^,]+}}, i16* [[LC_J:.+]] 569 // ... loop body ... 570 // 571 // Here we expect store into private double var, not global 572 // CHECK-NOT: store double {{.+}}, double* [[GLOBALFLOAT]] 573 globalfloat = (float)j/i; 574 float res = b[j] * c[j]; 575 // Store into a[i]: 576 // CHECK: store float [[RESULT:%.+]], float* [[RESULT_ADDR:%.+]]{{.+}}!llvm.access.group 577 a[i] = res * d[i]; 578 // Then there's a store into private var localint: 579 // CHECK: store i32 {{.+}}, i32* [[LOCALINT:%[^,]+]]{{.+}}!llvm.access.group 580 localint = (int)j; 581 // CHECK: [[IV2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}!llvm.access.group 582 // CHECK-NEXT: [[ADD2:%.+]] = add nsw i64 [[IV2]], 1 583 // CHECK-NEXT: store i64 [[ADD2]], i64* [[OMP_IV]]{{.*}}!llvm.access.group 584 // 585 // br label %{{[^,]+}}, !llvm.loop ![[WIDE1_LOOP_ID]] 586 // CHECK: [[WIDE1_END]]: 587 } 588 // i,j are updated. 589 // CHECK: store i32 3, i32* [[I:%[^,]+]] 590 // CHECK: store i16 591 // 592 // Here we expect store into original localint, not its privatized version. 593 // CHECK-NOT: store i32 {{.+}}, i32* [[LOCALINT]] 594 localint = (int)j; 595 // CHECK: ret void 596 } 597 598 // CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}}) 599 void linear(float *a) { 600 // CHECK: [[VAL_ADDR:%.+]] = alloca i64, 601 // CHECK: [[K_ADDR:%.+]] = alloca i64*, 602 long long val = 0; 603 long long &k = val; 604 605 #pragma omp simd linear(k : 3) 606 // CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]], 607 // CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]], 608 // CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]], 609 // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] 610 // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]], 611 // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] 612 // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] 613 614 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 615 // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 616 // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] 617 for (int i = 10; i > 1; i--) { 618 // CHECK: [[SIMPLE_LOOP_BODY]]: 619 // Start of body: calculate i from IV: 620 // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 621 // FIXME: It is interesting, why the following "mul 1" was not constant folded? 622 // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 623 // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] 624 // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group 625 // 626 // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group 627 // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 628 // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 629 // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 630 // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] 631 // Update of the privatized version of linear variable! 632 // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] 633 a[k]++; 634 k = k + 3; 635 // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 636 // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 637 // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group 638 // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] 639 } 640 // CHECK: [[SIMPLE_LOOP_END]]: 641 // 642 // Update linear vars after loop, as the loop was operating on a private version. 643 // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]], 644 // CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]], 645 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] 646 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 647 // CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]], 648 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] 649 // 650 651 #pragma omp simd linear(val(k) : 3) 652 // CHECK: [[VAL_REF:%.+]] = load i64*, i64** [[K_ADDR]], 653 // CHECK: store i64* [[VAL_REF]], i64** [[K_ADDR_REF:%.+]], 654 // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] 655 // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]], 656 // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]] 657 // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] 658 659 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 660 // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 661 // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] 662 for (int i = 10; i > 1; i--) { 663 // CHECK: [[SIMPLE_LOOP_BODY]]: 664 // Start of body: calculate i from IV: 665 // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 666 // FIXME: It is interesting, why the following "mul 1" was not constant folded? 667 // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 668 // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] 669 // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group 670 // 671 // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group 672 // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 673 // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 674 // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 675 // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] 676 // Update of the privatized version of linear variable! 677 // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] 678 a[k]++; 679 k = k + 3; 680 // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 681 // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 682 // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group 683 // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] 684 } 685 // CHECK: [[SIMPLE_LOOP_END]]: 686 // 687 // Update linear vars after loop, as the loop was operating on a private version. 688 // CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR_REF]], 689 // CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]], 690 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] 691 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 692 // CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]], 693 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]] 694 // 695 #pragma omp simd linear(uval(k) : 3) 696 // CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]] 697 // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]] 698 // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] 699 700 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 701 // CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9 702 // CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]] 703 for (int i = 10; i > 1; i--) { 704 // CHECK: [[SIMPLE_LOOP_BODY]]: 705 // Start of body: calculate i from IV: 706 // CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 707 // FIXME: It is interesting, why the following "mul 1" was not constant folded? 708 // CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1 709 // CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]] 710 // CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.access.group 711 // 712 // CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.access.group 713 // CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 714 // CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3 715 // CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64 716 // CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]] 717 // Update of the privatized version of linear variable! 718 // CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]] 719 a[k]++; 720 k = k + 3; 721 // CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.access.group 722 // CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1 723 // CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.access.group 724 // br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]] 725 } 726 // CHECK: [[SIMPLE_LOOP_END]]: 727 // 728 // Update linear vars after loop, as the loop was operating on a private version. 729 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]] 730 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27 731 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]] 732 // 733 } 734 735 #ifdef OMP5 736 // OMP50-LABEL: inner_simd 737 void inner_simd() { 738 double a, b; 739 #pragma omp simd nontemporal(a) 740 for (int i = 0; i < 10; ++i) { 741 #pragma omp simd nontemporal(b) 742 for (int k = 0; k < 10; ++k) { 743 // OMP50: load double,{{.*}}!nontemporal 744 // OMP50: store double{{.*}}!nontemporal 745 a = b; 746 } 747 // OMP50-NOT: load double,{{.*}}!nontemporal 748 // OMP50: load double, 749 // OMP50: store double{{.*}}!nontemporal 750 a = b; 751 } 752 } 753 754 extern struct T t; 755 struct Base { 756 float a; 757 }; 758 struct T : public Base { 759 void foo() { 760 #pragma omp simd nontemporal(Base::a) 761 for (int i = 0; i < 10; ++i) { 762 // OMP50: store float{{.*}}!nontemporal 763 // OMP50-NOT: nontemporal 764 // OMP50-NEXT: store float 765 Base::a = 0; 766 t.a = 0; 767 } 768 } 769 } t; 770 771 void bartfoo() { 772 t.foo(); 773 } 774 775 #endif // OMP5 776 // TERM_DEBUG-LABEL: bar 777 int bar() {return 0;}; 778 779 // TERM_DEBUG-LABEL: parallel_simd 780 void parallel_simd(float *a) { 781 #pragma omp parallel 782 #pragma omp simd 783 // TERM_DEBUG-NOT: __kmpc_global_thread_num 784 // TERM_DEBUG: invoke i32 {{.*}}bar{{.*}}() 785 // TERM_DEBUG: unwind label %[[TERM_LPAD:[^,]+]], 786 // TERM_DEBUG-NOT: __kmpc_global_thread_num 787 // TERM_DEBUG: [[TERM_LPAD]] 788 // TERM_DEBUG: call void @__clang_call_terminate 789 // TERM_DEBUG: unreachable 790 for (unsigned i = 131071; i <= 2147483647; i += 127) 791 a[i] += bar(); 792 } 793 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]], 794 795 // CHECK-LABEL: S8 796 // CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 797 // CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 798 // CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 799 // CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64 800 801 // CHECK-DAG: and i64 %{{.+}}, 15 802 // CHECK-DAG: icmp eq i64 %{{.+}}, 0 803 // CHECK-DAG: call void @llvm.assume(i1 804 805 // CHECK-DAG: and i64 %{{.+}}, 7 806 // CHECK-DAG: icmp eq i64 %{{.+}}, 0 807 // CHECK-DAG: call void @llvm.assume(i1 808 809 // CHECK-DAG: and i64 %{{.+}}, 15 810 // CHECK-DAG: icmp eq i64 %{{.+}}, 0 811 // CHECK-DAG: call void @llvm.assume(i1 812 813 // CHECK-DAG: and i64 %{{.+}}, 3 814 // CHECK-DAG: icmp eq i64 %{{.+}}, 0 815 // CHECK-DAG: call void @llvm.assume(i1 816 struct SS { 817 SS(): a(0) {} 818 SS(int v) : a(v) {} 819 int a; 820 typedef int type; 821 }; 822 823 template <typename T> 824 class S7 : public T { 825 protected: 826 T *a; 827 T b[2]; 828 S7() : a(0) {} 829 830 public: 831 S7(typename T::type &v) : a((T*)&v) { 832 #pragma omp simd aligned(a) 833 for (int k = 0; k < a->a; ++k) 834 ++this->a->a; 835 #pragma omp simd aligned(this->b : 8) 836 for (int k = 0; k < a->a; ++k) 837 ++a->a; 838 } 839 }; 840 841 class S8 : private IterDouble, public S7<SS> { 842 S8() {} 843 844 public: 845 S8(int v) : S7<SS>(v){ 846 #pragma omp parallel private(a) 847 #pragma omp simd aligned(S7<SS>::a) 848 for (int k = 0; k < a->a; ++k) 849 ++this->a->a; 850 #pragma omp parallel shared(b) 851 #pragma omp simd aligned(this->b: 4) 852 for (int k = 0; k < a->a; ++k) 853 ++a->a; 854 } 855 }; 856 S8 s8(0); 857 858 // TERM_DEBUG-NOT: line: 0, 859 // TERM_DEBUG: distinct !DISubprogram(linkageName: "_GLOBAL__sub_I_simd_codegen.cpp", 860 // OMP50-DAG: ![[NOVECT:.+]] = !{!"llvm.loop.vectorize.enable", i1 false} 861 // OMP50-DAG: ![[DISABLE_VECT]] = distinct !{{.*}}![[NOVECT]]{{[,}]}} 862 #endif // HEADER 863 864