1; REQUIRES: asserts 2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s 3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-cond-stores-vec -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER 4 5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 6 7%pair = type { i32, i32 } 8 9; CHECK-LABEL: consecutive_ptr_forward 10; 11; Check that a forward consecutive pointer is recognized as uniform and remains 12; uniform after vectorization. 13; 14; CHECK: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 15; CHECK: vector.body 16; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 17; CHECK-NOT: getelementptr 18; CHECK: getelementptr inbounds i32, i32* %a, i64 %index 19; CHECK-NOT: getelementptr 20; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 21; 22define i32 @consecutive_ptr_forward(i32* %a, i64 %n) { 23entry: 24 br label %for.body 25 26for.body: 27 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 28 %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ] 29 %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 30 %tmp2 = load i32, i32* %tmp1, align 8 31 %tmp3 = add i32 %tmp0, %tmp2 32 %i.next = add nuw nsw i64 %i, 1 33 %cond = icmp slt i64 %i.next, %n 34 br i1 %cond, label %for.body, label %for.end 35 36for.end: 37 %tmp4 = phi i32 [ %tmp3, %for.body ] 38 ret i32 %tmp4 39} 40 41; CHECK-LABEL: consecutive_ptr_reverse 42; 43; Check that a reverse consecutive pointer is recognized as uniform and remains 44; uniform after vectorization. 45; 46; CHECK: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 47; CHECK: vector.body 48; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 49; CHECK: %offset.idx = sub i64 %n, %index 50; CHECK-NOT: getelementptr 51; CHECK: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %offset.idx 52; CHECK: getelementptr i32, i32* %[[G0]], i64 -3 53; CHECK-NOT: getelementptr 54; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 55; 56define i32 @consecutive_ptr_reverse(i32* %a, i64 %n) { 57entry: 58 br label %for.body 59 60for.body: 61 %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ] 62 %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ] 63 %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i 64 %tmp2 = load i32, i32* %tmp1, align 8 65 %tmp3 = add i32 %tmp0, %tmp2 66 %i.next = add nuw nsw i64 %i, -1 67 %cond = icmp sgt i64 %i.next, 0 68 br i1 %cond, label %for.body, label %for.end 69 70for.end: 71 %tmp4 = phi i32 [ %tmp3, %for.body ] 72 ret i32 %tmp4 73} 74 75; CHECK-LABEL: interleaved_access_forward 76; INTER-LABEL: interleaved_access_forward 77; 78; Check that a consecutive-like pointer used by a forward interleaved group is 79; recognized as uniform and remains uniform after vectorization. When 80; interleaved memory accesses aren't enabled, the pointer should not be 81; recognized as uniform, and it should not be uniform after vectorization. 82; 83; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 84; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 85; CHECK: vector.body 86; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 87; CHECK: %[[I1:.+]] = or i64 %index, 1 88; CHECK: %[[I2:.+]] = or i64 %index, 2 89; CHECK: %[[I3:.+]] = or i64 %index, 3 90; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 91; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 92; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 93; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 94; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 1 95; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1 96; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1 97; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1 98; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 99; 100; INTER: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 101; INTER: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 102; INTER: vector.body 103; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 104; INTER-NOT: getelementptr 105; INTER: getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 106; INTER-NOT: getelementptr 107; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 108; 109define i32 @interleaved_access_forward(%pair* %p, i64 %n) { 110entry: 111 br label %for.body 112 113for.body: 114 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 115 %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ] 116 %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 117 %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 118 %tmp3 = load i32, i32* %tmp1, align 8 119 %tmp4 = load i32, i32* %tmp2, align 8 120 %tmp5 = add i32 %tmp3, %tmp4 121 %tmp6 = add i32 %tmp0, %tmp5 122 %i.next = add nuw nsw i64 %i, 1 123 %cond = icmp slt i64 %i.next, %n 124 br i1 %cond, label %for.body, label %for.end 125 126for.end: 127 %tmp14 = phi i32 [ %tmp6, %for.body ] 128 ret i32 %tmp14 129} 130 131; CHECK-LABEL: interleaved_access_reverse 132; INTER-LABEL: interleaved_access_reverse 133; 134; Check that a consecutive-like pointer used by a reverse interleaved group is 135; recognized as uniform and remains uniform after vectorization. When 136; interleaved memory accesses aren't enabled, the pointer should not be 137; recognized as uniform, and it should not be uniform after vectorization. 138; 139; recognized as uniform, and it should not be uniform after vectorization. 140; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 141; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 142; CHECK: vector.body 143; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 144; CHECK: %offset.idx = sub i64 %n, %index 145; CHECK: %[[I1:.+]] = add i64 %offset.idx, -1 146; CHECK: %[[I2:.+]] = add i64 %offset.idx, -2 147; CHECK: %[[I3:.+]] = add i64 %offset.idx, -3 148; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0 149; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 150; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 151; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 152; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 1 153; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1 154; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1 155; CHECK: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1 156; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 157; 158; INTER: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 159; INTER: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 160; INTER: vector.body 161; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 162; INTER: %offset.idx = sub i64 %n, %index 163; INTER-NOT: getelementptr 164; INTER: %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0 165; INTER: getelementptr i32, i32* %[[G0]], i64 -6 166; INTER-NOT: getelementptr 167; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 168; 169define i32 @interleaved_access_reverse(%pair* %p, i64 %n) { 170entry: 171 br label %for.body 172 173for.body: 174 %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ] 175 %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ] 176 %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 177 %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1 178 %tmp3 = load i32, i32* %tmp1, align 8 179 %tmp4 = load i32, i32* %tmp2, align 8 180 %tmp5 = add i32 %tmp3, %tmp4 181 %tmp6 = add i32 %tmp0, %tmp5 182 %i.next = add nuw nsw i64 %i, -1 183 %cond = icmp sgt i64 %i.next, 0 184 br i1 %cond, label %for.body, label %for.end 185 186for.end: 187 %tmp14 = phi i32 [ %tmp6, %for.body ] 188 ret i32 %tmp14 189} 190 191; INTER-LABEL: predicated_store 192; 193; Check that a consecutive-like pointer used by a forward interleaved group and 194; scalarized store is not recognized as uniform and is not uniform after 195; vectorization. The store is scalarized because it's in a predicated block. 196; Even though the load in this example is vectorized and only uses the pointer 197; as if it were uniform, the store is scalarized, making the pointer 198; non-uniform. 199; 200; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 201; INTER: vector.body 202; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ] 203; INTER: %[[I1:.+]] = or i64 %index, 1 204; INTER: %[[I2:.+]] = or i64 %index, 2 205; INTER: %[[I3:.+]] = or i64 %index, 3 206; INTER: %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0 207; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0 208; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0 209; INTER: getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0 210; INTER: %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>* 211; INTER: %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8 212; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 213; 214define void @predicated_store(%pair *%p, i32 %x, i64 %n) { 215entry: 216 br label %for.body 217 218for.body: 219 %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ] 220 %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0 221 %tmp1 = load i32, i32* %tmp0, align 8 222 %tmp2 = icmp eq i32 %tmp1, %x 223 br i1 %tmp2, label %if.then, label %if.merge 224 225if.then: 226 store i32 %tmp1, i32* %tmp0, align 8 227 br label %if.merge 228 229if.merge: 230 %i.next = add nuw nsw i64 %i, 1 231 %cond = icmp slt i64 %i.next, %n 232 br i1 %cond, label %for.body, label %for.end 233 234for.end: 235 ret void 236} 237 238; CHECK-LABEL: irregular_type 239; 240; Check that a consecutive pointer used by a scalarized store is not recognized 241; as uniform and is not uniform after vectorization. The store is scalarized 242; because the stored type may required padding. 243; 244; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i 245; CHECK: vector.body 246; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 247; CHECK: %[[I1:.+]] = or i64 %index, 1 248; CHECK: %[[I2:.+]] = or i64 %index, 2 249; CHECK: %[[I3:.+]] = or i64 %index, 3 250; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %index 251; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I1]] 252; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I2]] 253; CHECK: getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I3]] 254; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 255; 256define void @irregular_type(x86_fp80* %a, i64 %n) { 257entry: 258 br label %for.body 259 260for.body: 261 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 262 %tmp0 = sitofp i32 1 to x86_fp80 263 %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i 264 store x86_fp80 %tmp0, x86_fp80* %tmp1, align 16 265 %i.next = add i64 %i, 1 266 %cond = icmp slt i64 %i.next, %n 267 br i1 %cond, label %for.body, label %for.end 268 269for.end: 270 ret void 271} 272 273; CHECK-LABEL: pointer_iv_uniform 274; 275; Check that a pointer induction variable is recognized as uniform and remains 276; uniform after vectorization. 277; 278; CHECK: LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 279; CHECK: vector.body 280; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 281; CHECK-NOT: getelementptr 282; CHECK: %next.gep = getelementptr i32, i32* %a, i64 %index 283; CHECK-NOT: getelementptr 284; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 285; 286define void @pointer_iv_uniform(i32* %a, i32 %x, i64 %n) { 287entry: 288 br label %for.body 289 290for.body: 291 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 292 %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 293 store i32 %x, i32* %p, align 8 294 %tmp03 = getelementptr inbounds i32, i32* %p, i32 1 295 %i.next = add nuw nsw i64 %i, 1 296 %cond = icmp slt i64 %i.next, %n 297 br i1 %cond, label %for.body, label %for.end 298 299for.end: 300 ret void 301} 302 303; INTER-LABEL: pointer_iv_non_uniform_0 304; 305; Check that a pointer induction variable with a non-uniform user is not 306; recognized as uniform and is not uniform after vectorization. The pointer 307; induction variable is used by getelementptr instructions that are non-uniform 308; due to scalarization of the stores. 309; 310; INTER-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 311; INTER: vector.body 312; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 313; INTER: %[[I0:.+]] = shl i64 %index, 2 314; INTER: %next.gep = getelementptr i32, i32* %a, i64 %[[I0]] 315; INTER: %[[S1:.+]] = shl i64 %index, 2 316; INTER: %[[I1:.+]] = or i64 %[[S1]], 4 317; INTER: %next.gep2 = getelementptr i32, i32* %a, i64 %[[I1]] 318; INTER: %[[S2:.+]] = shl i64 %index, 2 319; INTER: %[[I2:.+]] = or i64 %[[S2]], 8 320; INTER: %next.gep3 = getelementptr i32, i32* %a, i64 %[[I2]] 321; INTER: %[[S3:.+]] = shl i64 %index, 2 322; INTER: %[[I3:.+]] = or i64 %[[S3]], 12 323; INTER: %next.gep4 = getelementptr i32, i32* %a, i64 %[[I3]] 324; INTER: br i1 {{.*}}, label %middle.block, label %vector.body 325; 326define void @pointer_iv_non_uniform_0(i32* %a, i64 %n) { 327entry: 328 br label %for.body 329 330for.body: 331 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 332 %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ] 333 %tmp00 = load i32, i32* %p, align 8 334 %tmp01 = getelementptr inbounds i32, i32* %p, i32 1 335 %tmp02 = load i32, i32* %tmp01, align 8 336 %tmp03 = getelementptr inbounds i32, i32* %p, i32 4 337 %tmp04 = load i32, i32* %tmp03, align 8 338 %tmp05 = getelementptr inbounds i32, i32* %p, i32 5 339 %tmp06 = load i32, i32* %tmp05, align 8 340 %tmp07 = sub i32 %tmp04, %tmp00 341 %tmp08 = sub i32 %tmp02, %tmp02 342 %tmp09 = getelementptr inbounds i32, i32* %p, i32 2 343 store i32 %tmp07, i32* %tmp09, align 8 344 %tmp10 = getelementptr inbounds i32, i32* %p, i32 3 345 store i32 %tmp08, i32* %tmp10, align 8 346 %i.next = add nuw nsw i64 %i, 1 347 %cond = icmp slt i64 %i.next, %n 348 br i1 %cond, label %for.body, label %for.end 349 350for.end: 351 ret void 352} 353 354; CHECK-LABEL: pointer_iv_non_uniform_1 355; 356; Check that a pointer induction variable with a non-uniform user is not 357; recognized as uniform and is not uniform after vectorization. The pointer 358; induction variable is used by a store that will be scalarized. 359; 360; CHECK-NOT: LV: Found uniform instruction: %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry] 361; CHECK: vector.body 362; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 363; CHECK: %next.gep = getelementptr x86_fp80, x86_fp80* %a, i64 %index 364; CHECK: %[[I1:.+]] = or i64 %index, 1 365; CHECK: %next.gep2 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I1]] 366; CHECK: %[[I2:.+]] = or i64 %index, 2 367; CHECK: %next.gep3 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I2]] 368; CHECK: %[[I3:.+]] = or i64 %index, 3 369; CHECK: %next.gep4 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I3]] 370; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 371; 372define void @pointer_iv_non_uniform_1(x86_fp80* %a, i64 %n) { 373entry: 374 br label %for.body 375 376for.body: 377 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 378 %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry] 379 %tmp0 = sitofp i32 1 to x86_fp80 380 store x86_fp80 %tmp0, x86_fp80* %p, align 16 381 %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %p, i32 1 382 %i.next = add i64 %i, 1 383 %cond = icmp slt i64 %i.next, %n 384 br i1 %cond, label %for.body, label %for.end 385 386for.end: 387 ret void 388} 389 390; CHECK-LABEL: pointer_iv_mixed 391; 392; Check multiple pointer induction variables where only one is recognized as 393; uniform and remains uniform after vectorization. The other pointer induction 394; variable is not recognized as uniform and is not uniform after vectorization 395; because it is stored to memory. 396; 397; CHECK-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ] 398; CHECK: LV: Found uniform instruction: %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ] 399; CHECK: vector.body 400; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 401; CHECK: %next.gep = getelementptr i32, i32* %a, i64 %index 402; CHECK: %[[I1:.+]] = or i64 %index, 1 403; CHECK: %next.gep10 = getelementptr i32, i32* %a, i64 %[[I1]] 404; CHECK: %[[I2:.+]] = or i64 %index, 2 405; CHECK: %next.gep11 = getelementptr i32, i32* %a, i64 %[[I2]] 406; CHECK: %[[I3:.+]] = or i64 %index, 3 407; CHECK: %next.gep12 = getelementptr i32, i32* %a, i64 %[[I3]] 408; CHECK: %[[V0:.+]] = insertelement <4 x i32*> undef, i32* %next.gep, i32 0 409; CHECK: %[[V1:.+]] = insertelement <4 x i32*> %[[V0]], i32* %next.gep10, i32 1 410; CHECK: %[[V2:.+]] = insertelement <4 x i32*> %[[V1]], i32* %next.gep11, i32 2 411; CHECK: %[[V3:.+]] = insertelement <4 x i32*> %[[V2]], i32* %next.gep12, i32 3 412; CHECK-NOT: getelementptr 413; CHECK: %next.gep13 = getelementptr i32*, i32** %b, i64 %index 414; CHECK-NOT: getelementptr 415; CHECK: %[[B0:.+]] = bitcast i32** %next.gep13 to <4 x i32*>* 416; CHECK: store <4 x i32*> %[[V3]], <4 x i32*>* %[[B0]], align 8 417; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 418; 419define i32 @pointer_iv_mixed(i32* %a, i32** %b, i64 %n) { 420entry: 421 br label %for.body 422 423for.body: 424 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 425 %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ] 426 %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ] 427 %tmp0 = phi i32 [ %tmp2, %for.body ], [ 0, %entry ] 428 %tmp1 = load i32, i32* %p, align 8 429 %tmp2 = add i32 %tmp1, %tmp0 430 store i32* %p, i32** %q, align 8 431 %tmp3 = getelementptr inbounds i32, i32* %p, i32 1 432 %tmp4 = getelementptr inbounds i32*, i32** %q, i32 1 433 %i.next = add nuw nsw i64 %i, 1 434 %cond = icmp slt i64 %i.next, %n 435 br i1 %cond, label %for.body, label %for.end 436 437for.end: 438 %tmp5 = phi i32 [ %tmp2, %for.body ] 439 ret i32 %tmp5 440} 441