1 // Check code generation 2 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR 3 4 // Check same results after serialization round-trip 5 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s 6 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR 7 // expected-no-diagnostics 8 9 #ifndef HEADER 10 #define HEADER 11 12 // placeholder for loop body code. 13 extern "C" void body(...) {} 14 15 // IR: define {{.*}}void @_ZN1SC2Ev(%struct.S* 16 // IR: [[THIS_ADDR:%.+]] = alloca %struct.S*, align 8 17 // IR-NEXT: [[I_REF:%.+]] = alloca i32*, align 8 18 // IR-NEXT: [[FLOOR:%.+]] = alloca i32, align 4 19 // IR-NEXT: [[TILE:%.+]] = alloca i32, align 4 20 // IR-NEXT: store %struct.S* %{{.+}}, %struct.S** [[THIS_ADDR]], align 8 21 // IR-NEXT: [[THIS:%.+]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 22 // IR-NEXT: [[I:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[THIS]], i32 0, i32 0 23 // IR-NEXT: store i32* [[I]], i32** [[I_REF]], align 8 24 // IR-NEXT: store i32 0, i32* [[FLOOR]], align 4 25 // IR-NEXT: br label %[[FOR_COND:.+]] 26 // IR: [[FOR_COND]]: 27 // IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[FLOOR]], align 4 28 // IR-NEXT: [[CMP:%.+]] = icmp slt i32 [[TMP0]], 4 29 // IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END11:.+]] 30 // IR: [[FOR_BODY]]: 31 // IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[FLOOR]], align 4 32 // IR-NEXT: store i32 [[TMP1]], i32* [[TILE]], align 4 33 // IR-NEXT: br label %[[FOR_COND3:.+]] 34 // IR: [[FOR_COND3]]: 35 // IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[TILE]], align 4 36 // IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[FLOOR]], align 4 37 // IR-NEXT: [[ADD:%.+]] = add nsw i32 [[TMP3]], 5 38 // IR-NEXT: [[CMP4:%.+]] = icmp slt i32 4, [[ADD]] 39 // IR-NEXT: br i1 [[CMP4]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] 40 // IR: [[COND_TRUE]]: 41 // IR-NEXT: br label %[[COND_END:.+]] 42 // IR: [[COND_FALSE]]: 43 // IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[FLOOR]], align 4 44 // IR-NEXT: [[ADD5:%.+]] = add nsw i32 [[TMP4]], 5 45 // IR-NEXT: br label %[[COND_END]] 46 // IR: [[COND_END]]: 47 // IR-NEXT: [[COND:%.+]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD5]], %[[COND_FALSE]] ] 48 // IR-NEXT: [[CMP6:%.+]] = icmp slt i32 [[TMP2]], [[COND]] 49 // IR-NEXT: br i1 [[CMP6]], label %[[FOR_BODY7:.+]], label %[[FOR_END:.+]] 50 // IR: [[FOR_BODY7]]: 51 // IR-NEXT: [[TMP5:%.+]] = load i32, i32* [[TILE]], align 4 52 // IR-NEXT: [[MUL:%.+]] = mul nsw i32 [[TMP5]], 3 53 // IR-NEXT: [[ADD8:%.+]] = add nsw i32 7, [[MUL]] 54 // IR-NEXT: [[TMP6:%.+]] = load i32*, i32** [[I_REF]], align 8 55 // IR-NEXT: store i32 [[ADD8]], i32* [[TMP6]], align 4 56 // IR-NEXT: [[TMP7:%.+]] = load i32*, i32** [[I_REF]], align 8 57 // IR-NEXT: [[TMP8:%.+]] = load i32, i32* [[TMP7]], align 4 58 // IR-NEXT: call void (...) @body(i32 [[TMP8]]) 59 // IR-NEXT: br label %[[FOR_INC:.+]] 60 // IR: [[FOR_INC]]: 61 // IR-NEXT: [[TMP9:%.+]] = load i32, i32* [[TILE]], align 4 62 // IR-NEXT: [[INC:%.+]] = add nsw i32 [[TMP9]], 1 63 // IR-NEXT: store i32 [[INC]], i32* [[TILE]], align 4 64 // IR-NEXT: br label %[[FOR_COND3]] 65 // IR: [[FOR_END]]: 66 // IR-NEXT: br label %[[FOR_INC9:.+]] 67 // IR: [[FOR_INC9]]: 68 // IR-NEXT: [[TMP10:%.+]] = load i32, i32* [[FLOOR]], align 4 69 // IR-NEXT: [[ADD10:%.+]] = add nsw i32 [[TMP10]], 5 70 // IR-NEXT: store i32 [[ADD10]], i32* [[FLOOR]], align 4 71 // IR-NEXT: br label %[[FOR_COND]] 72 // IR: [[FOR_END11]]: 73 // IR-NEXT: ret void 74 75 struct S { 76 int i; 77 S() { 78 #pragma omp tile sizes(5) 79 for (i = 7; i < 17; i += 3) 80 body(i); 81 } 82 } s; 83 84 // IR-LABEL: define {{.*}}void @foo1( 85 // IR: [[START_ADDR:%.*]] = alloca i32, align 4 86 // IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 87 // IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 88 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 89 // IR-NEXT: [[CAP_EXPR:%.+]] = alloca i32, align 4 90 // IR-NEXT: [[CAP_EXPR1:%.+]] = alloca i32, align 4 91 // IR-NEXT: [[CAP_EXPR2:%.+]] = alloca i32, align 4 92 // IR-NEXT: [[CAP_EXPR3:%.+]] = alloca i32, align 4 93 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 94 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 95 // IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 96 // IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 97 // IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 98 // IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4 99 // IR-NEXT: store i32 [[TMP0]], i32* [[CAP_EXPR]], align 4 100 // IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4 101 // IR-NEXT: store i32 [[TMP1]], i32* [[CAP_EXPR1]], align 4 102 // IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[STEP_ADDR]], align 4 103 // IR-NEXT: store i32 [[TMP2]], i32* [[CAP_EXPR2]], align 4 104 // IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[CAP_EXPR1]], align 4 105 // IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[CAP_EXPR]], align 4 106 // IR-NEXT: [[SUB:%.+]] = sub i32 [[TMP3]], [[TMP4]] 107 // IR-NEXT: [[SUB4:%.+]] = sub i32 [[SUB]], 1 108 // IR-NEXT: [[TMP5:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 109 // IR-NEXT: [[ADD:%.+]] = add i32 [[SUB4]], [[TMP5]] 110 // IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 111 // IR-NEXT: [[DIV:%.+]] = udiv i32 [[ADD]], [[TMP6]] 112 // IR-NEXT: [[SUB5:%.+]] = sub i32 [[DIV]], 1 113 // IR-NEXT: store i32 [[SUB5]], i32* [[CAP_EXPR3]], align 4 114 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 115 // IR-NEXT: br label %[[FOR_COND:.*]] 116 // IR: [[FOR_COND]]: 117 // IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 118 // IR-NEXT: [[TMP8:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 119 // IR-NEXT: [[ADD3:%.*]] = add i32 [[TMP8]], 1 120 // IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] 121 // IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]] 122 // IR: [[FOR_BODY]]: 123 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 124 // IR-NEXT: store i32 [[TMP5]], i32* [[DOTTILE_0_IV_I]], align 4 125 // IR-NEXT: br label %[[FOR_COND4:.*]] 126 // IR: [[FOR_COND4]]: 127 // IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 128 // IR-NEXT: [[TMP11:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 129 // IR-NEXT: [[ADD10:%.*]] = add i32 [[TMP11]], 1 130 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 131 // IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP11]], 5 132 // IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] 133 // IR-NEXT: br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 134 // IR: [[COND_TRUE]]: 135 // IR-NEXT: [[TMP13:%.+]] = load i32, i32* [[CAP_EXPR3]], align 4 136 // IR-NEXT: [[ADD18:%.*]] = add i32 [[TMP13]], 1 137 // IR-NEXT: br label %[[COND_END:.*]] 138 // IR: [[COND_FALSE]]: 139 // IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 140 // IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP16]], 5 141 // IR-NEXT: br label %[[COND_END]] 142 // IR: [[COND_END]]: 143 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ] 144 // IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP6]], [[COND]] 145 // IR-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]] 146 // IR: [[FOR_BODY21]]: 147 // IR-NEXT: [[TMP15:%.+]] = load i32, i32* [[CAP_EXPR]], align 4 148 // IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 149 // IR-NEXT: [[TMP17:%.+]] = load i32, i32* [[CAP_EXPR2]], align 4 150 // IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], [[TMP17]] 151 // IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP15]], [[MUL]] 152 // IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 153 // IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 154 // IR-NEXT: call void (...) @body(i32 [[TMP21]]) 155 // IR-NEXT: br label %[[FOR_INC:.*]] 156 // IR: [[FOR_INC]]: 157 // IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 158 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 159 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 160 // IR-NEXT: br label %[[FOR_COND4]] 161 // IR: [[FOR_END]]: 162 // IR-NEXT: br label %[[FOR_INC23:.*]] 163 // IR: [[FOR_INC23]]: 164 // IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 165 // IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 5 166 // IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 167 // IR-NEXT: br label %[[FOR_COND]] 168 // IR: [[FOR_END25]]: 169 // IR-NEXT: ret void 170 // 171 extern "C" void foo1(int start, int end, int step) { 172 int i; 173 #pragma omp tile sizes(5) 174 for (i = start; i < end; i += step) 175 body(i); 176 } 177 178 // IR-LABEL: define {{.*}}void @foo2( 179 // IR-NEXT: entry: 180 // IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 181 // IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 182 // IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 183 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 184 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 185 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 186 // IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 187 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 188 // IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 189 // IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 190 // IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 191 // IR-NEXT: store i32 [[STEP:%.*]], i32* [[STEP_ADDR]], align 4 192 // IR-NEXT: store i32 7, i32* [[I]], align 4 193 // IR-NEXT: store i32 7, i32* [[J]], align 4 194 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 195 // IR-NEXT: br label %[[FOR_COND:.*]] 196 // IR: [[FOR_COND]]: 197 // IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 198 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4 199 // IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END30:.*]] 200 // IR: [[FOR_BODY]]: 201 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 202 // IR-NEXT: br label %[[FOR_COND1:.*]] 203 // IR: [[FOR_COND1]]: 204 // IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 205 // IR-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP1]], 4 206 // IR-NEXT: br i1 [[CMP2]], label %[[FOR_BODY3:.*]], label %[[FOR_END27:.*]] 207 // IR: [[FOR_BODY3]]: 208 // IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 209 // IR-NEXT: store i32 [[TMP2]], i32* [[DOTTILE_0_IV_I]], align 4 210 // IR-NEXT: br label %[[FOR_COND4:.*]] 211 // IR: [[FOR_COND4]]: 212 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 213 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 214 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 5 215 // IR-NEXT: [[CMP5:%.*]] = icmp slt i32 4, [[ADD]] 216 // IR-NEXT: br i1 [[CMP5]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 217 // IR: [[COND_TRUE]]: 218 // IR-NEXT: br label %[[COND_END:.*]] 219 // IR: [[COND_FALSE]]: 220 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 221 // IR-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 5 222 // IR-NEXT: br label %[[COND_END]] 223 // IR: [[COND_END]]: 224 // IR-NEXT: [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD6]], %[[COND_FALSE]] ] 225 // IR-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP3]], [[COND]] 226 // IR-NEXT: br i1 [[CMP7]], label %[[FOR_BODY8:.*]], label %[[FOR_END24:.*]] 227 // IR: [[FOR_BODY8]]: 228 // IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 229 // IR-NEXT: [[MUL:%.+]] = mul nsw i32 [[TMP6]], 3 230 // IR-NEXT: [[ADD9:%.+]] = add nsw i32 7, [[MUL]] 231 // IR-NEXT: store i32 [[ADD9]], i32* [[I]], align 4 232 // IR-NEXT: [[TMP7:%.+]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 233 // IR-NEXT: store i32 [[TMP7]], i32* [[DOTTILE_1_IV_J]], align 4 234 // IR-NEXT: br label %[[FOR_COND10:.+]] 235 // IR: [[FOR_COND10]]: 236 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 237 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 238 // IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP8]], 5 239 // IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] 240 // IR-NEXT: br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]] 241 // IR: [[COND_TRUE12]]: 242 // IR-NEXT: br label %[[COND_END15:.*]] 243 // IR: [[COND_FALSE13]]: 244 // IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 245 // IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 5 246 // IR-NEXT: br label %[[COND_END15]] 247 // IR: [[COND_END15]]: 248 // IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ] 249 // IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP7]], [[COND16]] 250 // IR-NEXT: br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END:.*]] 251 // IR: [[FOR_BODY18]]: 252 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 253 // IR-NEXT: [[MUL20:%.*]] = mul nsw i32 [[TMP11]], 3 254 // IR-NEXT: [[ADD21:%.*]] = add nsw i32 7, [[MUL20]] 255 // IR-NEXT: store i32 [[ADD21]], i32* [[J]], align 4 256 // IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4 257 // IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[J]], align 4 258 // IR-NEXT: call void (...) @body(i32 [[TMP12]], i32 [[TMP13]]) 259 // IR-NEXT: br label %[[FOR_INC:.*]] 260 // IR: [[FOR_INC]]: 261 // IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 262 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 263 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 264 // IR-NEXT: br label %[[FOR_COND10]] 265 // IR: [[FOR_END]]: 266 // IR-NEXT: br label %[[FOR_INC22:.*]] 267 // IR: [[FOR_INC22]]: 268 // IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 269 // IR-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1 270 // IR-NEXT: store i32 [[INC23]], i32* [[DOTTILE_0_IV_I]], align 4 271 // IR-NEXT: br label %[[FOR_COND4]] 272 // IR: [[FOR_END24]]: 273 // IR-NEXT: br label %[[FOR_INC25:.*]] 274 // IR: [[FOR_INC25]]: 275 // IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 276 // IR-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP16]], 5 277 // IR-NEXT: store i32 [[ADD26]], i32* [[DOTFLOOR_1_IV_J]], align 4 278 // IR-NEXT: br label %[[FOR_COND1]] 279 // IR: [[FOR_END27]]: 280 // IR-NEXT: br label %[[FOR_INC28:.*]] 281 // IR: [[FOR_INC28]]: 282 // IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 283 // IR-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP17]], 5 284 // IR-NEXT: store i32 [[ADD29]], i32* [[DOTFLOOR_0_IV_I]], align 4 285 // IR-NEXT: br label %[[FOR_COND]] 286 // IR: [[FOR_END30]]: 287 // IR-NEXT: ret void 288 // 289 extern "C" void foo2(int start, int end, int step) { 290 #pragma omp tile sizes(5,5) 291 for (int i = 7; i < 17; i+=3) 292 for (int j = 7; j < 17; j+=3) 293 body(i,j); 294 } 295 296 // IR-LABEL: @foo3( 297 // IR-NEXT: entry: 298 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 299 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 300 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 301 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 302 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 303 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 304 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 305 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 306 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 307 // IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 308 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 309 // IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 310 // IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2:@.*]]) 311 // IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 312 // IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 313 // IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 314 // IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 315 // IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1:@.*]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 316 // IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 317 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 0 318 // IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 319 // IR: [[COND_TRUE]]: 320 // IR-NEXT: br label %[[COND_END:.*]] 321 // IR: [[COND_FALSE]]: 322 // IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 323 // IR-NEXT: br label %[[COND_END]] 324 // IR: [[COND_END]]: 325 // IR-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ] 326 // IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 327 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 328 // IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 329 // IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] 330 // IR: [[OMP_INNER_FOR_COND]]: 331 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 332 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 333 // IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] 334 // IR-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] 335 // IR: [[OMP_INNER_FOR_BODY]]: 336 // IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 337 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 5 338 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 339 // IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 340 // IR-NEXT: store i32 7, i32* [[I]], align 4 341 // IR-NEXT: store i32 7, i32* [[J]], align 4 342 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 343 // IR-NEXT: br label %[[FOR_COND:.*]] 344 // IR: [[FOR_COND]]: 345 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 346 // IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP7]], 4 347 // IR-NEXT: br i1 [[CMP3]], label %[[FOR_BODY:.*]], label %[[FOR_END33:.*]] 348 // IR: [[FOR_BODY]]: 349 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 350 // IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 351 // IR-NEXT: br label %[[FOR_COND4:.*]] 352 // IR: [[FOR_COND4]]: 353 // IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 354 // IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 355 // IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 5 356 // IR-NEXT: [[CMP6:%.*]] = icmp slt i32 4, [[ADD5]] 357 // IR-NEXT: br i1 [[CMP6]], label %[[COND_TRUE7:.*]], label %[[COND_FALSE8:.*]] 358 // IR: [[COND_TRUE7]]: 359 // IR-NEXT: br label %[[COND_END10:.*]] 360 // IR: [[COND_FALSE8]]: 361 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 362 // IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 5 363 // IR-NEXT: br label %[[COND_END10]] 364 // IR: [[COND_END10]]: 365 // IR-NEXT: [[COND11:%.*]] = phi i32 [ 4, %[[COND_TRUE7]] ], [ [[ADD9]], %[[COND_FALSE8]] ] 366 // IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP9]], [[COND11]] 367 // IR-NEXT: br i1 [[CMP12]], label %[[FOR_BODY13:.*]], label %[[FOR_END30:.*]] 368 // IR: [[FOR_BODY13]]: 369 // IR-NEXT: [[TMP12:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 370 // IR-NEXT: [[MUL13:%.+]] = mul nsw i32 [[TMP12]], 3 371 // IR-NEXT: [[ADD14:%.+]] = add nsw i32 7, [[MUL13]] 372 // IR-NEXT: store i32 [[ADD14]], i32* [[I]], align 4 373 // IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 374 // IR-NEXT: store i32 [[TMP12]], i32* [[DOTTILE_1_IV_J]], align 4 375 // IR-NEXT: br label %[[FOR_COND14:.*]] 376 // IR: [[FOR_COND14]]: 377 // IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 378 // IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 379 // IR-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP14]], 5 380 // IR-NEXT: [[CMP16:%.*]] = icmp slt i32 4, [[ADD15]] 381 // IR-NEXT: br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]] 382 // IR: [[COND_TRUE17]]: 383 // IR-NEXT: br label %[[COND_END20:.*]] 384 // IR: [[COND_FALSE18]]: 385 // IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 386 // IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP15]], 5 387 // IR-NEXT: br label %[[COND_END20]] 388 // IR: [[COND_END20]]: 389 // IR-NEXT: [[COND21:%.*]] = phi i32 [ 4, %[[COND_TRUE17]] ], [ [[ADD19]], %[[COND_FALSE18]] ] 390 // IR-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP13]], [[COND21]] 391 // IR-NEXT: br i1 [[CMP22]], label %[[FOR_BODY23:.*]], label %[[FOR_END:.*]] 392 // IR: [[FOR_BODY23]]: 393 // IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 394 // IR-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP17]], 3 395 // IR-NEXT: [[ADD27:%.*]] = add nsw i32 7, [[MUL26]] 396 // IR-NEXT: store i32 [[ADD27]], i32* [[J]], align 4 397 // IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 398 // IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[J]], align 4 399 // IR-NEXT: call void (...) @body(i32 [[TMP18]], i32 [[TMP19]]) 400 // IR-NEXT: br label %[[FOR_INC:.*]] 401 // IR: [[FOR_INC]]: 402 // IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 403 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 404 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 405 // IR-NEXT: br label %[[FOR_COND14]] 406 // IR: [[FOR_END]]: 407 // IR-NEXT: br label %[[FOR_INC28:.*]] 408 // IR: [[FOR_INC28]]: 409 // IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 410 // IR-NEXT: [[INC29:%.*]] = add nsw i32 [[TMP21]], 1 411 // IR-NEXT: store i32 [[INC29]], i32* [[DOTTILE_0_IV_I]], align 4 412 // IR-NEXT: br label %[[FOR_COND4]] 413 // IR: [[FOR_END30]]: 414 // IR-NEXT: br label %[[FOR_INC31:.*]] 415 // IR: [[FOR_INC31]]: 416 // IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 417 // IR-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP22]], 5 418 // IR-NEXT: store i32 [[ADD32]], i32* [[DOTFLOOR_1_IV_J]], align 4 419 // IR-NEXT: br label %[[FOR_COND]] 420 // IR: [[FOR_END33]]: 421 // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] 422 // IR: [[OMP_BODY_CONTINUE]]: 423 // IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] 424 // IR: [[OMP_INNER_FOR_INC]]: 425 // IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 426 // IR-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP23]], 1 427 // IR-NEXT: store i32 [[ADD34]], i32* [[DOTOMP_IV]], align 4 428 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] 429 // IR: [[OMP_INNER_FOR_END]]: 430 // IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] 431 // IR: [[OMP_LOOP_EXIT]]: 432 // IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) 433 // IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3:@.*]], i32 [[TMP0]]) 434 // IR-NEXT: ret void 435 // 436 extern "C" void foo3() { 437 #pragma omp for 438 #pragma omp tile sizes(5,5) 439 for (int i = 7; i < 17; i += 3) 440 for (int j = 7; j < 17; j += 3) 441 body(i, j); 442 } 443 444 // IR-LABEL: @foo4( 445 // IR-NEXT: entry: 446 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 447 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 448 // IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 449 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 450 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 451 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 452 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 453 // IR-NEXT: [[K:%.*]] = alloca i32, align 4 454 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 455 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 456 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 457 // IR-NEXT: [[DOTFLOOR_1_IV_J:%.*]] = alloca i32, align 4 458 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 459 // IR-NEXT: [[DOTTILE_1_IV_J:%.*]] = alloca i32, align 4 460 // IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]]) 461 // IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 462 // IR-NEXT: store i32 3, i32* [[DOTOMP_UB]], align 4 463 // IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 464 // IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 465 // IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 466 // IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 467 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 3 468 // IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 469 // IR: [[COND_TRUE]]: 470 // IR-NEXT: br label %[[COND_END:.*]] 471 // IR: [[COND_FALSE]]: 472 // IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 473 // IR-NEXT: br label %[[COND_END]] 474 // IR: [[COND_END]]: 475 // IR-NEXT: [[COND:%.*]] = phi i32 [ 3, %[[COND_TRUE]] ], [ [[TMP2]], %[[COND_FALSE]] ] 476 // IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 477 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 478 // IR-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 479 // IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] 480 // IR: [[OMP_INNER_FOR_COND]]: 481 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 482 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 483 // IR-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] 484 // IR-NEXT: br i1 [[CMP3]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] 485 // IR: [[OMP_INNER_FOR_BODY]]: 486 // IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 487 // IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP6]], 1 488 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 3 489 // IR-NEXT: [[ADD:%.*]] = add nsw i32 7, [[MUL]] 490 // IR-NEXT: store i32 [[ADD]], i32* [[K]], align 4 491 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 492 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 493 // IR-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP8]], 1 494 // IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 1 495 // IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], [[MUL5]] 496 // IR-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 5 497 // IR-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] 498 // IR-NEXT: store i32 [[ADD7]], i32* [[DOTFLOOR_0_IV_I]], align 4 499 // IR-NEXT: store i32 7, i32* [[I]], align 4 500 // IR-NEXT: store i32 7, i32* [[J]], align 4 501 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_1_IV_J]], align 4 502 // IR-NEXT: br label %[[FOR_COND:.*]] 503 // IR: [[FOR_COND]]: 504 // IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 505 // IR-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP9]], 4 506 // IR-NEXT: br i1 [[CMP8]], label %[[FOR_BODY:.*]], label %[[FOR_END38:.*]] 507 // IR: [[FOR_BODY]]: 508 // IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 509 // IR-NEXT: store i32 [[TMP10]], i32* [[DOTTILE_0_IV_I]], align 4 510 // IR-NEXT: br label %[[FOR_COND9:.*]] 511 // IR: [[FOR_COND9]]: 512 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 513 // IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 514 // IR-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 5 515 // IR-NEXT: [[CMP11:%.*]] = icmp slt i32 4, [[ADD10]] 516 // IR-NEXT: br i1 [[CMP11]], label %[[COND_TRUE12:.*]], label %[[COND_FALSE13:.*]] 517 // IR: [[COND_TRUE12]]: 518 // IR-NEXT: br label %[[COND_END15:.*]] 519 // IR: [[COND_FALSE13]]: 520 // IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 521 // IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP13]], 5 522 // IR-NEXT: br label %[[COND_END15]] 523 // IR: [[COND_END15]]: 524 // IR-NEXT: [[COND16:%.*]] = phi i32 [ 4, %[[COND_TRUE12]] ], [ [[ADD14]], %[[COND_FALSE13]] ] 525 // IR-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP11]], [[COND16]] 526 // IR-NEXT: br i1 [[CMP17]], label %[[FOR_BODY18:.*]], label %[[FOR_END35:.*]] 527 // IR: [[FOR_BODY18]]: 528 // IR-NEXT: [[TMP14:%.+]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 529 // IR-NEXT: [[MUL18:%.+]] = mul nsw i32 [[TMP14]], 3 530 // IR-NEXT: [[ADD19:%.+]] = add nsw i32 7, [[MUL18]] 531 // IR-NEXT: store i32 [[ADD19]], i32* [[I]], align 4 532 // IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 533 // IR-NEXT: store i32 [[TMP14]], i32* [[DOTTILE_1_IV_J]], align 4 534 // IR-NEXT: br label %[[FOR_COND19:.*]] 535 // IR: [[FOR_COND19]]: 536 // IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 537 // IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 538 // IR-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP16]], 5 539 // IR-NEXT: [[CMP21:%.*]] = icmp slt i32 4, [[ADD20]] 540 // IR-NEXT: br i1 [[CMP21]], label %[[COND_TRUE22:.*]], label %[[COND_FALSE23:.*]] 541 // IR: [[COND_TRUE22]]: 542 // IR-NEXT: br label %[[COND_END25:.*]] 543 // IR: [[COND_FALSE23]]: 544 // IR-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 545 // IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP17]], 5 546 // IR-NEXT: br label %[[COND_END25]] 547 // IR: [[COND_END25]]: 548 // IR-NEXT: [[COND26:%.*]] = phi i32 [ 4, %[[COND_TRUE22]] ], [ [[ADD24]], %[[COND_FALSE23]] ] 549 // IR-NEXT: [[CMP27:%.*]] = icmp slt i32 [[TMP15]], [[COND26]] 550 // IR-NEXT: br i1 [[CMP27]], label %[[FOR_BODY28:.*]], label %[[FOR_END:.*]] 551 // IR: [[FOR_BODY28]]: 552 // IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 553 // IR-NEXT: [[MUL31:%.*]] = mul nsw i32 [[TMP19]], 3 554 // IR-NEXT: [[ADD32:%.*]] = add nsw i32 7, [[MUL31]] 555 // IR-NEXT: store i32 [[ADD32]], i32* [[J]], align 4 556 // IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 557 // IR-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 558 // IR-NEXT: call void (...) @body(i32 [[TMP20]], i32 [[TMP21]]) 559 // IR-NEXT: br label %[[FOR_INC:.*]] 560 // IR: [[FOR_INC]]: 561 // IR-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTTILE_1_IV_J]], align 4 562 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 563 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_1_IV_J]], align 4 564 // IR-NEXT: br label %[[FOR_COND19]] 565 // IR: [[FOR_END]]: 566 // IR-NEXT: br label %[[FOR_INC33:.*]] 567 // IR: [[FOR_INC33]]: 568 // IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 569 // IR-NEXT: [[INC34:%.*]] = add nsw i32 [[TMP23]], 1 570 // IR-NEXT: store i32 [[INC34]], i32* [[DOTTILE_0_IV_I]], align 4 571 // IR-NEXT: br label %[[FOR_COND9]] 572 // IR: [[FOR_END35]]: 573 // IR-NEXT: br label %[[FOR_INC36:.*]] 574 // IR: [[FOR_INC36]]: 575 // IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTFLOOR_1_IV_J]], align 4 576 // IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP24]], 5 577 // IR-NEXT: store i32 [[ADD37]], i32* [[DOTFLOOR_1_IV_J]], align 4 578 // IR-NEXT: br label %[[FOR_COND]] 579 // IR: [[FOR_END38]]: 580 // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] 581 // IR: [[OMP_BODY_CONTINUE]]: 582 // IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] 583 // IR: [[OMP_INNER_FOR_INC]]: 584 // IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 585 // IR-NEXT: [[ADD39:%.*]] = add nsw i32 [[TMP25]], 1 586 // IR-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV]], align 4 587 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] 588 // IR: [[OMP_INNER_FOR_END]]: 589 // IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] 590 // IR: [[OMP_LOOP_EXIT]]: 591 // IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) 592 // IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]]) 593 // IR-NEXT: ret void 594 // 595 extern "C" void foo4() { 596 #pragma omp for collapse(2) 597 for (int k = 7; k < 17; k += 3) 598 #pragma omp tile sizes(5,5) 599 for (int i = 7; i < 17; i += 3) 600 for (int j = 7; j < 17; j += 3) 601 body(i, j); 602 } 603 604 605 // IR-LABEL: @foo5( 606 // IR-NEXT: entry: 607 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 608 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 609 // IR-NEXT: [[TMP1:%.*]] = alloca i32, align 4 610 // IR-NEXT: [[TMP2:%.*]] = alloca i32, align 4 611 // IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 612 // IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 613 // IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 614 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 615 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 616 // IR-NEXT: [[J:%.*]] = alloca i32, align 4 617 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 618 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 619 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 620 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 621 // IR-NEXT: [[DOTFLOOR_0_IV_I10:%.*]] = alloca i32, align 4 622 // IR-NEXT: [[DOTTILE_0_IV_I11:%.*]] = alloca i32, align 4 623 // IR-NEXT: [[J15:%.*]] = alloca i32, align 4 624 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 625 // IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[GLOB2]]) 626 // IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 627 // IR-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 628 // IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP]], align 4 629 // IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 630 // IR-NEXT: [[CMP:%.*]] = icmp slt i32 4, [[ADD]] 631 // IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 632 // IR: [[COND_TRUE]]: 633 // IR-NEXT: br label %[[COND_END:.*]] 634 // IR: [[COND_FALSE]]: 635 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP]], align 4 636 // IR-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 5 637 // IR-NEXT: br label %[[COND_END]] 638 // IR: [[COND_END]]: 639 // IR-NEXT: [[COND:%.*]] = phi i32 [ 4, %[[COND_TRUE]] ], [ [[ADD4]], %[[COND_FALSE]] ] 640 // IR-NEXT: store i32 [[COND]], i32* [[DOTCAPTURE_EXPR_3]], align 4 641 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 642 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 643 // IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], [[TMP5]] 644 // IR-NEXT: [[SUB6:%.*]] = sub i32 [[SUB]], 1 645 // IR-NEXT: [[ADD7:%.*]] = add i32 [[SUB6]], 1 646 // IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD7]], 1 647 // IR-NEXT: [[CONV:%.*]] = zext i32 [[DIV]] to i64 648 // IR-NEXT: [[MUL:%.*]] = mul nsw i64 1, [[CONV]] 649 // IR-NEXT: [[MUL8:%.*]] = mul nsw i64 [[MUL]], 4 650 // IR-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL8]], 1 651 // IR-NEXT: store i64 [[SUB9]], i64* [[DOTCAPTURE_EXPR_5]], align 8 652 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 653 // IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 654 // IR-NEXT: store i32 [[TMP6]], i32* [[DOTTILE_0_IV_I]], align 4 655 // IR-NEXT: store i32 7, i32* [[J]], align 4 656 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 657 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 658 // IR-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] 659 // IR-NEXT: br i1 [[CMP12]], label %[[OMP_PRECOND_THEN:.*]], label %[[OMP_PRECOND_END:.*]] 660 // IR: [[OMP_PRECOND_THEN]]: 661 // IR-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 662 // IR-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 663 // IR-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 664 // IR-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 665 // IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 666 // IR-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* [[GLOB1]], i32 [[TMP0]], i32 34, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) 667 // IR-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 668 // IR-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 669 // IR-NEXT: [[CMP16:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] 670 // IR-NEXT: br i1 [[CMP16]], label %[[COND_TRUE17:.*]], label %[[COND_FALSE18:.*]] 671 // IR: [[COND_TRUE17]]: 672 // IR-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_5]], align 8 673 // IR-NEXT: br label %[[COND_END19:.*]] 674 // IR: [[COND_FALSE18]]: 675 // IR-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 676 // IR-NEXT: br label %[[COND_END19]] 677 // IR: [[COND_END19]]: 678 // IR-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP12]], %[[COND_TRUE17]] ], [ [[TMP13]], %[[COND_FALSE18]] ] 679 // IR-NEXT: store i64 [[COND20]], i64* [[DOTOMP_UB]], align 8 680 // IR-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 681 // IR-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 682 // IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] 683 // IR: [[OMP_INNER_FOR_COND]]: 684 // IR-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 685 // IR-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 686 // IR-NEXT: [[CMP21:%.*]] = icmp sle i64 [[TMP15]], [[TMP16]] 687 // IR-NEXT: br i1 [[CMP21]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] 688 // IR: [[OMP_INNER_FOR_BODY]]: 689 // IR-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 690 // IR-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 691 // IR-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 692 // IR-NEXT: [[SUB22:%.*]] = sub i32 [[TMP18]], [[TMP19]] 693 // IR-NEXT: [[SUB23:%.*]] = sub i32 [[SUB22]], 1 694 // IR-NEXT: [[ADD24:%.*]] = add i32 [[SUB23]], 1 695 // IR-NEXT: [[DIV25:%.*]] = udiv i32 [[ADD24]], 1 696 // IR-NEXT: [[MUL26:%.*]] = mul i32 1, [[DIV25]] 697 // IR-NEXT: [[MUL27:%.*]] = mul i32 [[MUL26]], 4 698 // IR-NEXT: [[CONV28:%.*]] = zext i32 [[MUL27]] to i64 699 // IR-NEXT: [[DIV29:%.*]] = sdiv i64 [[TMP17]], [[CONV28]] 700 // IR-NEXT: [[MUL30:%.*]] = mul nsw i64 [[DIV29]], 5 701 // IR-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] 702 // IR-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 703 // IR-NEXT: store i32 [[CONV32]], i32* [[DOTFLOOR_0_IV_I10]], align 4 704 // IR-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 705 // IR-NEXT: [[CONV33:%.*]] = sext i32 [[TMP20]] to i64 706 // IR-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 707 // IR-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 708 // IR-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 709 // IR-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 710 // IR-NEXT: [[SUB34:%.*]] = sub i32 [[TMP23]], [[TMP24]] 711 // IR-NEXT: [[SUB35:%.*]] = sub i32 [[SUB34]], 1 712 // IR-NEXT: [[ADD36:%.*]] = add i32 [[SUB35]], 1 713 // IR-NEXT: [[DIV37:%.*]] = udiv i32 [[ADD36]], 1 714 // IR-NEXT: [[MUL38:%.*]] = mul i32 1, [[DIV37]] 715 // IR-NEXT: [[MUL39:%.*]] = mul i32 [[MUL38]], 4 716 // IR-NEXT: [[CONV40:%.*]] = zext i32 [[MUL39]] to i64 717 // IR-NEXT: [[DIV41:%.*]] = sdiv i64 [[TMP22]], [[CONV40]] 718 // IR-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 719 // IR-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 720 // IR-NEXT: [[SUB42:%.*]] = sub i32 [[TMP25]], [[TMP26]] 721 // IR-NEXT: [[SUB43:%.*]] = sub i32 [[SUB42]], 1 722 // IR-NEXT: [[ADD44:%.*]] = add i32 [[SUB43]], 1 723 // IR-NEXT: [[DIV45:%.*]] = udiv i32 [[ADD44]], 1 724 // IR-NEXT: [[MUL46:%.*]] = mul i32 1, [[DIV45]] 725 // IR-NEXT: [[MUL47:%.*]] = mul i32 [[MUL46]], 4 726 // IR-NEXT: [[CONV48:%.*]] = zext i32 [[MUL47]] to i64 727 // IR-NEXT: [[MUL49:%.*]] = mul nsw i64 [[DIV41]], [[CONV48]] 728 // IR-NEXT: [[SUB50:%.*]] = sub nsw i64 [[TMP21]], [[MUL49]] 729 // IR-NEXT: [[DIV51:%.*]] = sdiv i64 [[SUB50]], 4 730 // IR-NEXT: [[MUL52:%.*]] = mul nsw i64 [[DIV51]], 1 731 // IR-NEXT: [[ADD53:%.*]] = add nsw i64 [[CONV33]], [[MUL52]] 732 // IR-NEXT: [[CONV54:%.*]] = trunc i64 [[ADD53]] to i32 733 // IR-NEXT: store i32 [[CONV54]], i32* [[DOTTILE_0_IV_I11]], align 4 734 // IR-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 735 // IR-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 736 // IR-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 737 // IR-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 738 // IR-NEXT: [[SUB55:%.*]] = sub i32 [[TMP29]], [[TMP30]] 739 // IR-NEXT: [[SUB56:%.*]] = sub i32 [[SUB55]], 1 740 // IR-NEXT: [[ADD57:%.*]] = add i32 [[SUB56]], 1 741 // IR-NEXT: [[DIV58:%.*]] = udiv i32 [[ADD57]], 1 742 // IR-NEXT: [[MUL59:%.*]] = mul i32 1, [[DIV58]] 743 // IR-NEXT: [[MUL60:%.*]] = mul i32 [[MUL59]], 4 744 // IR-NEXT: [[CONV61:%.*]] = zext i32 [[MUL60]] to i64 745 // IR-NEXT: [[DIV62:%.*]] = sdiv i64 [[TMP28]], [[CONV61]] 746 // IR-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 747 // IR-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 748 // IR-NEXT: [[SUB63:%.*]] = sub i32 [[TMP31]], [[TMP32]] 749 // IR-NEXT: [[SUB64:%.*]] = sub i32 [[SUB63]], 1 750 // IR-NEXT: [[ADD65:%.*]] = add i32 [[SUB64]], 1 751 // IR-NEXT: [[DIV66:%.*]] = udiv i32 [[ADD65]], 1 752 // IR-NEXT: [[MUL67:%.*]] = mul i32 1, [[DIV66]] 753 // IR-NEXT: [[MUL68:%.*]] = mul i32 [[MUL67]], 4 754 // IR-NEXT: [[CONV69:%.*]] = zext i32 [[MUL68]] to i64 755 // IR-NEXT: [[MUL70:%.*]] = mul nsw i64 [[DIV62]], [[CONV69]] 756 // IR-NEXT: [[SUB71:%.*]] = sub nsw i64 [[TMP27]], [[MUL70]] 757 // IR-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 758 // IR-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 759 // IR-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 760 // IR-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 761 // IR-NEXT: [[SUB72:%.*]] = sub i32 [[TMP35]], [[TMP36]] 762 // IR-NEXT: [[SUB73:%.*]] = sub i32 [[SUB72]], 1 763 // IR-NEXT: [[ADD74:%.*]] = add i32 [[SUB73]], 1 764 // IR-NEXT: [[DIV75:%.*]] = udiv i32 [[ADD74]], 1 765 // IR-NEXT: [[MUL76:%.*]] = mul i32 1, [[DIV75]] 766 // IR-NEXT: [[MUL77:%.*]] = mul i32 [[MUL76]], 4 767 // IR-NEXT: [[CONV78:%.*]] = zext i32 [[MUL77]] to i64 768 // IR-NEXT: [[DIV79:%.*]] = sdiv i64 [[TMP34]], [[CONV78]] 769 // IR-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 770 // IR-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 771 // IR-NEXT: [[SUB80:%.*]] = sub i32 [[TMP37]], [[TMP38]] 772 // IR-NEXT: [[SUB81:%.*]] = sub i32 [[SUB80]], 1 773 // IR-NEXT: [[ADD82:%.*]] = add i32 [[SUB81]], 1 774 // IR-NEXT: [[DIV83:%.*]] = udiv i32 [[ADD82]], 1 775 // IR-NEXT: [[MUL84:%.*]] = mul i32 1, [[DIV83]] 776 // IR-NEXT: [[MUL85:%.*]] = mul i32 [[MUL84]], 4 777 // IR-NEXT: [[CONV86:%.*]] = zext i32 [[MUL85]] to i64 778 // IR-NEXT: [[MUL87:%.*]] = mul nsw i64 [[DIV79]], [[CONV86]] 779 // IR-NEXT: [[SUB88:%.*]] = sub nsw i64 [[TMP33]], [[MUL87]] 780 // IR-NEXT: [[DIV89:%.*]] = sdiv i64 [[SUB88]], 4 781 // IR-NEXT: [[MUL90:%.*]] = mul nsw i64 [[DIV89]], 4 782 // IR-NEXT: [[SUB91:%.*]] = sub nsw i64 [[SUB71]], [[MUL90]] 783 // IR-NEXT: [[MUL92:%.*]] = mul nsw i64 [[SUB91]], 3 784 // IR-NEXT: [[ADD93:%.*]] = add nsw i64 7, [[MUL92]] 785 // IR-NEXT: [[CONV94:%.*]] = trunc i64 [[ADD93]] to i32 786 // IR-NEXT: store i32 [[CONV94]], i32* [[J15]], align 4 787 // IR-NEXT: store i32 7, i32* [[I]], align 4 788 // IR-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTTILE_0_IV_I11]], align 4 789 // IR-NEXT: [[MUL95:%.*]] = mul nsw i32 [[TMP39]], 3 790 // IR-NEXT: [[ADD96:%.*]] = add nsw i32 7, [[MUL95]] 791 // IR-NEXT: store i32 [[ADD96]], i32* [[I]], align 4 792 // IR-NEXT: [[TMP40:%.*]] = load i32, i32* [[I]], align 4 793 // IR-NEXT: [[TMP41:%.*]] = load i32, i32* [[J15]], align 4 794 // IR-NEXT: call void (...) @body(i32 [[TMP40]], i32 [[TMP41]]) 795 // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] 796 // IR: [[OMP_BODY_CONTINUE]]: 797 // IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] 798 // IR: [[OMP_INNER_FOR_INC]]: 799 // IR-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 800 // IR-NEXT: [[ADD97:%.*]] = add nsw i64 [[TMP42]], 1 801 // IR-NEXT: store i64 [[ADD97]], i64* [[DOTOMP_IV]], align 8 802 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] 803 // IR: [[OMP_INNER_FOR_END]]: 804 // IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] 805 // IR: [[OMP_LOOP_EXIT]]: 806 // IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP0]]) 807 // IR-NEXT: br label %[[OMP_PRECOND_END]] 808 // IR: [[OMP_PRECOND_END]]: 809 // IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* [[GLOB3]], i32 [[TMP0]]) 810 // IR-NEXT: ret void 811 // 812 extern "C" void foo5() { 813 #pragma omp for collapse(3) 814 #pragma omp tile sizes(5) 815 for (int i = 7; i < 17; i += 3) 816 for (int j = 7; j < 17; j += 3) 817 body(i, j); 818 } 819 820 821 // IR-LABEL: @foo6( 822 // IR-NEXT: entry: 823 // IR-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) 824 // IR-NEXT: ret void 825 // 826 // IR-LABEL: @.omp_outlined.( 827 // IR-NEXT: entry: 828 // IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 829 // IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 830 // IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 831 // IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 832 // IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 833 // IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 834 // IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 835 // IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 836 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 837 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 838 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 839 // IR-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 840 // IR-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8 841 // IR-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 842 // IR-NEXT: store i32 0, i32* [[DOTOMP_UB]], align 4 843 // IR-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 844 // IR-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 845 // IR-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 846 // IR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 847 // IR-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* [[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 848 // IR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 849 // IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 850 // IR-NEXT: br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 851 // IR: [[COND_TRUE]]: 852 // IR-NEXT: br label %[[COND_END:.*]] 853 // IR: [[COND_FALSE]]: 854 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 855 // IR-NEXT: br label %[[COND_END]] 856 // IR: [[COND_END]]: 857 // IR-NEXT: [[COND:%.*]] = phi i32 [ 0, %[[COND_TRUE]] ], [ [[TMP3]], %[[COND_FALSE]] ] 858 // IR-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 859 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 860 // IR-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 861 // IR-NEXT: br label %[[OMP_INNER_FOR_COND:.*]] 862 // IR: [[OMP_INNER_FOR_COND]]: 863 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 864 // IR-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 865 // IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] 866 // IR-NEXT: br i1 [[CMP2]], label %[[OMP_INNER_FOR_BODY:.*]], label %[[OMP_INNER_FOR_END:.*]] 867 // IR: [[OMP_INNER_FOR_BODY]]: 868 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 869 // IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 870 // IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 871 // IR-NEXT: store i32 [[ADD]], i32* [[DOTFLOOR_0_IV_I]], align 4 872 // IR-NEXT: store i32 7, i32* [[I]], align 4 873 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 874 // IR-NEXT: store i32 [[TMP8]], i32* [[DOTTILE_0_IV_I]], align 4 875 // IR-NEXT: br label %[[FOR_COND:.*]] 876 // IR: [[FOR_COND]]: 877 // IR-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 878 // IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 879 // IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 5 880 // IR-NEXT: [[CMP4:%.*]] = icmp slt i32 4, [[ADD3]] 881 // IR-NEXT: br i1 [[CMP4]], label %[[COND_TRUE5:.*]], label %[[COND_FALSE6:.*]] 882 // IR: [[COND_TRUE5]]: 883 // IR-NEXT: br label %[[COND_END8:.*]] 884 // IR: [[COND_FALSE6]]: 885 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 886 // IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 5 887 // IR-NEXT: br label %[[COND_END8]] 888 // IR: [[COND_END8]]: 889 // IR-NEXT: [[COND9:%.*]] = phi i32 [ 4, %[[COND_TRUE5]] ], [ [[ADD7]], %[[COND_FALSE6]] ] 890 // IR-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP9]], [[COND9]] 891 // IR-NEXT: br i1 [[CMP10]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] 892 // IR: [[FOR_BODY]]: 893 // IR-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 894 // IR-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP12]], 3 895 // IR-NEXT: [[ADD12:%.*]] = add nsw i32 7, [[MUL11]] 896 // IR-NEXT: store i32 [[ADD12]], i32* [[I]], align 4 897 // IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 898 // IR-NEXT: call void (...) @body(i32 [[TMP13]]) 899 // IR-NEXT: br label %[[FOR_INC:.*]] 900 // IR: [[FOR_INC]]: 901 // IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 902 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 903 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 904 // IR-NEXT: br label %[[FOR_COND]] 905 // IR: [[FOR_END]]: 906 // IR-NEXT: br label %[[OMP_BODY_CONTINUE:.*]] 907 // IR: [[OMP_BODY_CONTINUE]]: 908 // IR-NEXT: br label %[[OMP_INNER_FOR_INC:.*]] 909 // IR: [[OMP_INNER_FOR_INC]]: 910 // IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 911 // IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], 1 912 // IR-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 913 // IR-NEXT: br label %[[OMP_INNER_FOR_COND]] 914 // IR: [[OMP_INNER_FOR_END]]: 915 // IR-NEXT: br label %[[OMP_LOOP_EXIT:.*]] 916 // IR: [[OMP_LOOP_EXIT]]: 917 // IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* [[GLOB1]], i32 [[TMP1]]) 918 // IR-NEXT: ret void 919 // 920 extern "C" void foo6() { 921 #pragma omp parallel for 922 #pragma omp tile sizes(5) 923 for (int i = 7; i < 17; i += 3) 924 body(i); 925 } 926 927 928 template<typename T, T Step, T Tile> 929 void foo7(T start, T end) { 930 #pragma omp tile sizes(Tile) 931 for (T i = start; i < end; i += Step) 932 body(i); 933 } 934 935 // IR-LABEL: define {{.*}}void @tfoo7( 936 // IR-NEXT: entry: 937 // IR-NEXT: call void @_Z4foo7IiLi3ELi5EEvT_S0_(i32 0, i32 42) 938 // IR-NEXT: ret void 939 // 940 // IR-LABEL: define linkonce_odr void @_Z4foo7IiLi3ELi5EEvT_S0_( 941 // IR-NEXT: entry: 942 // IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 943 // IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 944 // IR-NEXT: [[CAPTURE_EXPR:%.+]] = alloca i32, align 4 945 // IR-NEXT: [[CAPTURE_EXPR1:%.+]] = alloca i32, align 4 946 // IR-NEXT: [[CAPTURE_EXPR2:%.+]] = alloca i32, align 4 947 // IR-NEXT: [[I:%.*]] = alloca i32, align 4 948 // IR-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4 949 // IR-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 950 // IR-NEXT: store i32 [[START:%.*]], i32* [[START_ADDR]], align 4 951 // IR-NEXT: store i32 [[END:%.*]], i32* [[END_ADDR]], align 4 952 // IR-NEXT: [[TMP0:%.+]] = load i32, i32* [[START_ADDR]], align 4 953 // IR-NEXT: store i32 [[TMP0]], i32* [[CAPTURE_EXPR]], align 4 954 // IR-NEXT: [[TMP1:%.+]] = load i32, i32* [[END_ADDR]], align 4 955 // IR-NEXT: store i32 [[TMP1]], i32* [[CAPTURE_EXPR1]], align 4 956 // IR-NEXT: [[TMP2:%.+]] = load i32, i32* [[CAPTURE_EXPR1]], align 4 957 // IR-NEXT: [[TMP3:%.+]] = load i32, i32* [[CAPTURE_EXPR]], align 4 958 // IR-NEXT: [[SUB:%.+]] = sub i32 [[TMP2]], [[TMP3]] 959 // IR-NEXT: [[SUB3:%.+]] = sub i32 [[SUB]], 1 960 // IR-NEXT: [[ADD:%.+]] = add i32 [[SUB3]], 3 961 // IR-NEXT: [[DIV:%.+]] = udiv i32 [[ADD]], 3 962 // IR-NEXT: [[SUB4:%.+]] = sub i32 [[DIV]], 1 963 // IR-NEXT: store i32 [[SUB4]], i32* [[CAPTURE_EXPR2]], align 4 964 // IR-NEXT: [[TMP4:%.+]] = load i32, i32* [[START_ADDR]], align 4 965 // IR-NEXT: store i32 [[TMP4]], i32* [[I]], align 4 966 // IR-NEXT: store i32 0, i32* [[DOTFLOOR_0_IV_I]], align 4 967 // IR-NEXT: br label %[[FOR_COND:.*]] 968 // IR: [[FOR_COND]]: 969 // IR-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 970 // IR-NEXT: [[TMP6:%.+]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 971 // IR-NEXT: [[ADD3:%.*]] = add i32 [[TMP6]], 1 972 // IR-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[ADD3]] 973 // IR-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END25:.*]] 974 // IR: [[FOR_BODY]]: 975 // IR-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 976 // IR-NEXT: store i32 [[TMP3]], i32* [[DOTTILE_0_IV_I]], align 4 977 // IR-NEXT: br label %[[FOR_COND4:.*]] 978 // IR: [[FOR_COND4]]: 979 // IR-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 980 // IR-NEXT: [[TMP5:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 981 // IR-NEXT: [[ADD10:%.*]] = add i32 [[TMP5]], 1 982 // IR-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 983 // IR-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP7]], 5 984 // IR-NEXT: [[CMP12:%.*]] = icmp ult i32 [[ADD10]], [[ADD11]] 985 // IR-NEXT: br i1 [[CMP12]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]] 986 // IR: [[COND_TRUE]]: 987 // IR-NEXT: [[TMP8:%.*]] = load i32, i32* [[CAPTURE_EXPR2]], align 4 988 // IR-NEXT: [[ADD18:%.*]] = add i32 [[TMP8]], 1 989 // IR-NEXT: br label %[[COND_END:.*]] 990 // IR: [[COND_FALSE]]: 991 // IR-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 992 // IR-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP10]], 5 993 // IR-NEXT: br label %[[COND_END]] 994 // IR: [[COND_END]]: 995 // IR-NEXT: [[COND:%.*]] = phi i32 [ [[ADD18]], %[[COND_TRUE]] ], [ [[ADD19]], %[[COND_FALSE]] ] 996 // IR-NEXT: [[CMP20:%.*]] = icmp ult i32 [[TMP4]], [[COND]] 997 // IR-NEXT: br i1 [[CMP20]], label %[[FOR_BODY21:.*]], label %[[FOR_END:.*]] 998 // IR: [[FOR_BODY21]]: 999 // IR-NEXT: [[TMP11:%.*]] = load i32, i32* [[CAPTURE_EXPR]], align 4 1000 // IR-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 1001 // IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 3 1002 // IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP11]], [[MUL]] 1003 // IR-NEXT: store i32 [[ADD22]], i32* [[I]], align 4 1004 // IR-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 1005 // IR-NEXT: call void (...) @body(i32 [[TMP14]]) 1006 // IR-NEXT: br label %[[FOR_INC:.*]] 1007 // IR: [[FOR_INC]]: 1008 // IR-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTTILE_0_IV_I]], align 4 1009 // IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 1010 // IR-NEXT: store i32 [[INC]], i32* [[DOTTILE_0_IV_I]], align 4 1011 // IR-NEXT: br label %[[FOR_COND4]] 1012 // IR: [[FOR_END]]: 1013 // IR-NEXT: br label %[[FOR_INC23:.*]] 1014 // IR: [[FOR_INC23]]: 1015 // IR-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTFLOOR_0_IV_I]], align 4 1016 // IR-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP16]], 5 1017 // IR-NEXT: store i32 [[ADD24]], i32* [[DOTFLOOR_0_IV_I]], align 4 1018 // IR-NEXT: br label %[[FOR_COND]] 1019 // IR: [[FOR_END25]]: 1020 // IR-NEXT: ret void 1021 // 1022 extern "C" void tfoo7() { 1023 foo7<int,3,5>(0, 42); 1024 } 1025 1026 #endif /* HEADER */ 1027