1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 11; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 12; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 13; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 14; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 15; CHECK: vector.ph: 16; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 17; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 18; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 19; CHECK: vector.body: 20; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 22; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]] 24; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 25; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* 26; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4 27; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 28; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 29; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 30; CHECK: middle.block: 31; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 32; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 33; CHECK: scalar.ph: 34; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 35; CHECK-NEXT: br label [[FOR_COND:%.*]] 36; CHECK: for.cond: 37; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 38; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 39; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 40; CHECK-NEXT: store i16 0, i16* [[B]], align 4 41; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 42; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 43; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 44; CHECK: if.end: 45; CHECK-NEXT: ret void 46; 47; TAILFOLD-LABEL: @bottom_tested( 48; TAILFOLD-NEXT: entry: 49; TAILFOLD-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 50; TAILFOLD-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 51; TAILFOLD-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 52; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 53; TAILFOLD: vector.ph: 54; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1 55; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 56; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 57; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 58; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 59; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 60; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 61; TAILFOLD: vector.body: 62; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 63; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 64; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 65; TAILFOLD-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 1 66; TAILFOLD-NEXT: [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 67; TAILFOLD-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 68; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 69; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 70; TAILFOLD: pred.store.if: 71; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 72; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 73; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 74; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 75; TAILFOLD: pred.store.continue: 76; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 77; TAILFOLD-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 78; TAILFOLD: pred.store.if1: 79; TAILFOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 80; TAILFOLD-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]] 81; TAILFOLD-NEXT: store i16 0, i16* [[TMP11]], align 4 82; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 83; TAILFOLD: pred.store.continue2: 84; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 85; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 86; TAILFOLD-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 87; TAILFOLD-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 88; TAILFOLD: middle.block: 89; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 90; TAILFOLD: scalar.ph: 91; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 92; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 93; TAILFOLD: for.cond: 94; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 95; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 96; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 97; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 98; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 99; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 100; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 101; TAILFOLD: if.end: 102; TAILFOLD-NEXT: ret void 103; 104entry: 105 br label %for.cond 106 107for.cond: 108 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 109 %iprom = sext i32 %i to i64 110 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 111 store i16 0, i16* %b, align 4 112 %inc = add nsw i32 %i, 1 113 %cmp = icmp slt i32 %i, %n 114 br i1 %cmp, label %for.cond, label %if.end 115 116if.end: 117 ret void 118} 119 120define void @early_exit(i16* %p, i32 %n) { 121; CHECK-LABEL: @early_exit( 122; CHECK-NEXT: entry: 123; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 124; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 125; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 126; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2 127; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 128; CHECK: vector.ph: 129; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 130; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 131; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]] 132; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]] 133; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 134; CHECK: vector.body: 135; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 136; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 137; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 138; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 139; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64 140; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 141; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0 142; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>* 143; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4 144; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 145; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 146; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 147; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 148; CHECK: middle.block: 149; CHECK-NEXT: br label [[SCALAR_PH]] 150; CHECK: scalar.ph: 151; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 152; CHECK-NEXT: br label [[FOR_COND:%.*]] 153; CHECK: for.cond: 154; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 155; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 156; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 157; CHECK: for.body: 158; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 159; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 160; CHECK-NEXT: store i16 0, i16* [[B]], align 4 161; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 162; CHECK-NEXT: br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]] 163; CHECK: if.end: 164; CHECK-NEXT: ret void 165; 166; TAILFOLD-LABEL: @early_exit( 167; TAILFOLD-NEXT: entry: 168; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 169; TAILFOLD: for.cond: 170; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 171; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 172; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 173; TAILFOLD: for.body: 174; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 175; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 176; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 177; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 178; TAILFOLD-NEXT: br label [[FOR_COND]] 179; TAILFOLD: if.end: 180; TAILFOLD-NEXT: ret void 181; 182entry: 183 br label %for.cond 184 185for.cond: 186 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 187 %cmp = icmp slt i32 %i, %n 188 br i1 %cmp, label %for.body, label %if.end 189 190for.body: 191 %iprom = sext i32 %i to i64 192 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 193 store i16 0, i16* %b, align 4 194 %inc = add nsw i32 %i, 1 195 br label %for.cond 196 197if.end: 198 ret void 199} 200 201; Same as early_exit, but with optsize to prevent the use of 202; a scalar epilogue. -- Can't vectorize this in either case. 203define void @optsize(i16* %p, i32 %n) optsize { 204; CHECK-LABEL: @optsize( 205; CHECK-NEXT: entry: 206; CHECK-NEXT: br label [[FOR_COND:%.*]] 207; CHECK: for.cond: 208; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 209; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 210; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 211; CHECK: for.body: 212; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 213; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 214; CHECK-NEXT: store i16 0, i16* [[B]], align 4 215; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 216; CHECK-NEXT: br label [[FOR_COND]] 217; CHECK: if.end: 218; CHECK-NEXT: ret void 219; 220; TAILFOLD-LABEL: @optsize( 221; TAILFOLD-NEXT: entry: 222; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 223; TAILFOLD: for.cond: 224; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 225; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 226; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 227; TAILFOLD: for.body: 228; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 229; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 230; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 231; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 232; TAILFOLD-NEXT: br label [[FOR_COND]] 233; TAILFOLD: if.end: 234; TAILFOLD-NEXT: ret void 235; 236entry: 237 br label %for.cond 238 239for.cond: 240 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 241 %cmp = icmp slt i32 %i, %n 242 br i1 %cmp, label %for.body, label %if.end 243 244for.body: 245 %iprom = sext i32 %i to i64 246 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 247 store i16 0, i16* %b, align 4 248 %inc = add nsw i32 %i, 1 249 br label %for.cond 250 251if.end: 252 ret void 253} 254 255 256; multiple exit - no values inside the loop used outside 257define void @multiple_unique_exit(i16* %p, i32 %n) { 258; CHECK-LABEL: @multiple_unique_exit( 259; CHECK-NEXT: entry: 260; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 261; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 262; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 263; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 264; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 265; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 266; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 267; CHECK: vector.ph: 268; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 269; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 270; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 271; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 272; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 273; CHECK: vector.body: 274; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 275; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 276; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 277; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 278; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 279; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 280; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 281; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 282; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 283; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 284; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 285; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 286; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] 287; CHECK: middle.block: 288; CHECK-NEXT: br label [[SCALAR_PH]] 289; CHECK: scalar.ph: 290; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 291; CHECK-NEXT: br label [[FOR_COND:%.*]] 292; CHECK: for.cond: 293; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 294; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 295; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 296; CHECK: for.body: 297; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 298; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 299; CHECK-NEXT: store i16 0, i16* [[B]], align 4 300; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 301; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 302; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]] 303; CHECK: if.end: 304; CHECK-NEXT: ret void 305; 306; TAILFOLD-LABEL: @multiple_unique_exit( 307; TAILFOLD-NEXT: entry: 308; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 309; TAILFOLD: for.cond: 310; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 311; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 312; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 313; TAILFOLD: for.body: 314; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 315; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 316; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 317; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 318; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 319; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 320; TAILFOLD: if.end: 321; TAILFOLD-NEXT: ret void 322; 323entry: 324 br label %for.cond 325 326for.cond: 327 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 328 %cmp = icmp slt i32 %i, %n 329 br i1 %cmp, label %for.body, label %if.end 330 331for.body: 332 %iprom = sext i32 %i to i64 333 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 334 store i16 0, i16* %b, align 4 335 %inc = add nsw i32 %i, 1 336 %cmp2 = icmp slt i32 %i, 2096 337 br i1 %cmp2, label %for.cond, label %if.end 338 339if.end: 340 ret void 341} 342 343; multiple exit - with an lcssa phi 344define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 345; CHECK-LABEL: @multiple_unique_exit2( 346; CHECK-NEXT: entry: 347; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 348; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 349; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 350; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 351; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 352; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 353; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 354; CHECK: vector.ph: 355; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 356; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 357; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 358; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 359; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 360; CHECK: vector.body: 361; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 362; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 363; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 364; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 365; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 366; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 367; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 368; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 369; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 370; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 371; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 372; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 373; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] 374; CHECK: middle.block: 375; CHECK-NEXT: br label [[SCALAR_PH]] 376; CHECK: scalar.ph: 377; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 378; CHECK-NEXT: br label [[FOR_COND:%.*]] 379; CHECK: for.cond: 380; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 381; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 382; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 383; CHECK: for.body: 384; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 385; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 386; CHECK-NEXT: store i16 0, i16* [[B]], align 4 387; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 388; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 389; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]] 390; CHECK: if.end: 391; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 392; CHECK-NEXT: ret i32 [[I_LCSSA]] 393; 394; TAILFOLD-LABEL: @multiple_unique_exit2( 395; TAILFOLD-NEXT: entry: 396; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 397; TAILFOLD: for.cond: 398; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 399; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 400; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 401; TAILFOLD: for.body: 402; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 403; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 404; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 405; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 406; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 407; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 408; TAILFOLD: if.end: 409; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 410; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 411; 412entry: 413 br label %for.cond 414 415for.cond: 416 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 417 %cmp = icmp slt i32 %i, %n 418 br i1 %cmp, label %for.body, label %if.end 419 420for.body: 421 %iprom = sext i32 %i to i64 422 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 423 store i16 0, i16* %b, align 4 424 %inc = add nsw i32 %i, 1 425 %cmp2 = icmp slt i32 %i, 2096 426 br i1 %cmp2, label %for.cond, label %if.end 427 428if.end: 429 ret i32 %i 430} 431 432; multiple exit w/a non lcssa phi 433define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 434; CHECK-LABEL: @multiple_unique_exit3( 435; CHECK-NEXT: entry: 436; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 437; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 438; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 439; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 440; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 441; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 442; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 443; CHECK: vector.ph: 444; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 445; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 446; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 447; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 448; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 449; CHECK: vector.body: 450; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 451; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 452; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 453; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 454; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 455; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 456; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 457; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 458; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 459; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 460; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 461; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 462; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] 463; CHECK: middle.block: 464; CHECK-NEXT: br label [[SCALAR_PH]] 465; CHECK: scalar.ph: 466; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 467; CHECK-NEXT: br label [[FOR_COND:%.*]] 468; CHECK: for.cond: 469; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 470; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 471; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 472; CHECK: for.body: 473; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 474; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 475; CHECK-NEXT: store i16 0, i16* [[B]], align 4 476; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 477; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 478; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]] 479; CHECK: if.end: 480; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 481; CHECK-NEXT: ret i32 [[EXIT]] 482; 483; TAILFOLD-LABEL: @multiple_unique_exit3( 484; TAILFOLD-NEXT: entry: 485; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 486; TAILFOLD: for.cond: 487; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 488; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 489; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 490; TAILFOLD: for.body: 491; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 492; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 493; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 494; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 495; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 496; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 497; TAILFOLD: if.end: 498; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 499; TAILFOLD-NEXT: ret i32 [[EXIT]] 500; 501entry: 502 br label %for.cond 503 504for.cond: 505 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 506 %cmp = icmp slt i32 %i, %n 507 br i1 %cmp, label %for.body, label %if.end 508 509for.body: 510 %iprom = sext i32 %i to i64 511 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 512 store i16 0, i16* %b, align 4 513 %inc = add nsw i32 %i, 1 514 %cmp2 = icmp slt i32 %i, 2096 515 br i1 %cmp2, label %for.cond, label %if.end 516 517if.end: 518 %exit = phi i32 [0, %for.cond], [1, %for.body] 519 ret i32 %exit 520} 521 522; multiple exits w/distinct target blocks 523define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 524; CHECK-LABEL: @multiple_exit_blocks( 525; CHECK-NEXT: entry: 526; CHECK-NEXT: br label [[FOR_COND:%.*]] 527; CHECK: for.cond: 528; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 529; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 530; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 531; CHECK: for.body: 532; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 533; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 534; CHECK-NEXT: store i16 0, i16* [[B]], align 4 535; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 536; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 537; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 538; CHECK: if.end: 539; CHECK-NEXT: ret i32 0 540; CHECK: if.end2: 541; CHECK-NEXT: ret i32 1 542; 543; TAILFOLD-LABEL: @multiple_exit_blocks( 544; TAILFOLD-NEXT: entry: 545; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 546; TAILFOLD: for.cond: 547; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 548; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 549; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 550; TAILFOLD: for.body: 551; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 552; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 553; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 554; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 555; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 556; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 557; TAILFOLD: if.end: 558; TAILFOLD-NEXT: ret i32 0 559; TAILFOLD: if.end2: 560; TAILFOLD-NEXT: ret i32 1 561; 562entry: 563 br label %for.cond 564 565for.cond: 566 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 567 %cmp = icmp slt i32 %i, %n 568 br i1 %cmp, label %for.body, label %if.end 569 570for.body: 571 %iprom = sext i32 %i to i64 572 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 573 store i16 0, i16* %b, align 4 574 %inc = add nsw i32 %i, 1 575 %cmp2 = icmp slt i32 %i, 2096 576 br i1 %cmp2, label %for.cond, label %if.end2 577 578if.end: 579 ret i32 0 580 581if.end2: 582 ret i32 1 583} 584 585; LCSSA, common value each exit 586define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 587; CHECK-LABEL: @multiple_exit_blocks2( 588; CHECK-NEXT: entry: 589; CHECK-NEXT: br label [[FOR_COND:%.*]] 590; CHECK: for.cond: 591; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 592; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 593; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 594; CHECK: for.body: 595; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 596; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 597; CHECK-NEXT: store i16 0, i16* [[B]], align 4 598; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 599; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 600; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 601; CHECK: if.end: 602; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 603; CHECK-NEXT: ret i32 [[I_LCSSA]] 604; CHECK: if.end2: 605; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 606; CHECK-NEXT: ret i32 [[I_LCSSA1]] 607; 608; TAILFOLD-LABEL: @multiple_exit_blocks2( 609; TAILFOLD-NEXT: entry: 610; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 611; TAILFOLD: for.cond: 612; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 613; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 614; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 615; TAILFOLD: for.body: 616; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 617; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 618; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 619; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 620; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 621; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 622; TAILFOLD: if.end: 623; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 624; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 625; TAILFOLD: if.end2: 626; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 627; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 628; 629entry: 630 br label %for.cond 631 632for.cond: 633 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 634 %cmp = icmp slt i32 %i, %n 635 br i1 %cmp, label %for.body, label %if.end 636 637for.body: 638 %iprom = sext i32 %i to i64 639 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 640 store i16 0, i16* %b, align 4 641 %inc = add nsw i32 %i, 1 642 %cmp2 = icmp slt i32 %i, 2096 643 br i1 %cmp2, label %for.cond, label %if.end2 644 645if.end: 646 ret i32 %i 647 648if.end2: 649 ret i32 %i 650} 651 652; LCSSA, distinct value each exit 653define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 654; CHECK-LABEL: @multiple_exit_blocks3( 655; CHECK-NEXT: entry: 656; CHECK-NEXT: br label [[FOR_COND:%.*]] 657; CHECK: for.cond: 658; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 659; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 660; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 661; CHECK: for.body: 662; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 663; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 664; CHECK-NEXT: store i16 0, i16* [[B]], align 4 665; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 666; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 667; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 668; CHECK: if.end: 669; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 670; CHECK-NEXT: ret i32 [[I_LCSSA]] 671; CHECK: if.end2: 672; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 673; CHECK-NEXT: ret i32 [[INC_LCSSA]] 674; 675; TAILFOLD-LABEL: @multiple_exit_blocks3( 676; TAILFOLD-NEXT: entry: 677; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 678; TAILFOLD: for.cond: 679; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 680; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 681; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 682; TAILFOLD: for.body: 683; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 684; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 685; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 686; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 687; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 688; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 689; TAILFOLD: if.end: 690; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 691; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 692; TAILFOLD: if.end2: 693; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 694; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 695; 696entry: 697 br label %for.cond 698 699for.cond: 700 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 701 %cmp = icmp slt i32 %i, %n 702 br i1 %cmp, label %for.body, label %if.end 703 704for.body: 705 %iprom = sext i32 %i to i64 706 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 707 store i16 0, i16* %b, align 4 708 %inc = add nsw i32 %i, 1 709 %cmp2 = icmp slt i32 %i, 2096 710 br i1 %cmp2, label %for.cond, label %if.end2 711 712if.end: 713 ret i32 %i 714 715if.end2: 716 ret i32 %inc 717} 718 719; unique exit case but with a switch as two edges between the same pair of 720; blocks is an often missed edge case 721define i32 @multiple_exit_switch(i16* %p, i32 %n) { 722; CHECK-LABEL: @multiple_exit_switch( 723; CHECK-NEXT: entry: 724; CHECK-NEXT: br label [[FOR_COND:%.*]] 725; CHECK: for.cond: 726; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 727; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 728; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 729; CHECK-NEXT: store i16 0, i16* [[B]], align 4 730; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 731; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 732; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 733; CHECK-NEXT: i32 2097, label [[IF_END]] 734; CHECK-NEXT: ] 735; CHECK: if.end: 736; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 737; CHECK-NEXT: ret i32 [[I_LCSSA]] 738; 739; TAILFOLD-LABEL: @multiple_exit_switch( 740; TAILFOLD-NEXT: entry: 741; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 742; TAILFOLD: for.cond: 743; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 744; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 745; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 746; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 747; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 748; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 749; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 750; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 751; TAILFOLD-NEXT: ] 752; TAILFOLD: if.end: 753; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 754; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 755; 756entry: 757 br label %for.cond 758 759for.cond: 760 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 761 %iprom = sext i32 %i to i64 762 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 763 store i16 0, i16* %b, align 4 764 %inc = add nsw i32 %i, 1 765 switch i32 %i, label %for.cond [ 766 i32 2096, label %if.end 767 i32 2097, label %if.end 768 ] 769 770if.end: 771 ret i32 %i 772} 773 774; multiple exit case but with a switch as multiple exiting edges from 775; a single block is a commonly missed edge case 776define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 777; CHECK-LABEL: @multiple_exit_switch2( 778; CHECK-NEXT: entry: 779; CHECK-NEXT: br label [[FOR_COND:%.*]] 780; CHECK: for.cond: 781; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 782; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 783; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 784; CHECK-NEXT: store i16 0, i16* [[B]], align 4 785; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 786; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 787; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 788; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 789; CHECK-NEXT: ] 790; CHECK: if.end: 791; CHECK-NEXT: ret i32 0 792; CHECK: if.end2: 793; CHECK-NEXT: ret i32 1 794; 795; TAILFOLD-LABEL: @multiple_exit_switch2( 796; TAILFOLD-NEXT: entry: 797; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 798; TAILFOLD: for.cond: 799; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 800; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 801; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 802; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 803; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 804; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 805; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 806; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 807; TAILFOLD-NEXT: ] 808; TAILFOLD: if.end: 809; TAILFOLD-NEXT: ret i32 0 810; TAILFOLD: if.end2: 811; TAILFOLD-NEXT: ret i32 1 812; 813entry: 814 br label %for.cond 815 816for.cond: 817 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 818 %iprom = sext i32 %i to i64 819 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 820 store i16 0, i16* %b, align 4 821 %inc = add nsw i32 %i, 1 822 switch i32 %i, label %for.cond [ 823 i32 2096, label %if.end 824 i32 2097, label %if.end2 825 ] 826 827if.end: 828 ret i32 0 829 830if.end2: 831 ret i32 1 832} 833 834define i32 @multiple_latch1(i16* %p) { 835; CHECK-LABEL: @multiple_latch1( 836; CHECK-NEXT: entry: 837; CHECK-NEXT: br label [[FOR_BODY:%.*]] 838; CHECK: for.body: 839; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 840; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 841; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 842; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 843; CHECK: for.second: 844; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 845; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 846; CHECK-NEXT: store i16 0, i16* [[B]], align 4 847; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 848; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 849; CHECK: for.body.backedge: 850; CHECK-NEXT: br label [[FOR_BODY]] 851; CHECK: for.end: 852; CHECK-NEXT: ret i32 0 853; 854; TAILFOLD-LABEL: @multiple_latch1( 855; TAILFOLD-NEXT: entry: 856; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 857; TAILFOLD: for.body: 858; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 859; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 860; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 861; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 862; TAILFOLD: for.second: 863; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 864; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 865; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 866; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 867; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 868; TAILFOLD: for.body.backedge: 869; TAILFOLD-NEXT: br label [[FOR_BODY]] 870; TAILFOLD: for.end: 871; TAILFOLD-NEXT: ret i32 0 872; 873entry: 874 br label %for.body 875 876for.body: 877 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 878 %inc = add nsw i32 %i.02, 1 879 %cmp = icmp slt i32 %inc, 16 880 br i1 %cmp, label %for.body.backedge, label %for.second 881 882for.second: 883 %iprom = sext i32 %i.02 to i64 884 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 885 store i16 0, i16* %b, align 4 886 %cmps = icmp sgt i32 %inc, 16 887 br i1 %cmps, label %for.body.backedge, label %for.end 888 889for.body.backedge: 890 br label %for.body 891 892for.end: 893 ret i32 0 894} 895 896 897; two back branches - loop simplify with convert this to the same form 898; as previous before vectorizer sees it, but show that. 899define i32 @multiple_latch2(i16* %p) { 900; CHECK-LABEL: @multiple_latch2( 901; CHECK-NEXT: entry: 902; CHECK-NEXT: br label [[FOR_BODY:%.*]] 903; CHECK: for.body: 904; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 905; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 906; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 907; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 908; CHECK: for.body.backedge: 909; CHECK-NEXT: br label [[FOR_BODY]] 910; CHECK: for.second: 911; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 912; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 913; CHECK-NEXT: store i16 0, i16* [[B]], align 4 914; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 915; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 916; CHECK: for.end: 917; CHECK-NEXT: ret i32 0 918; 919; TAILFOLD-LABEL: @multiple_latch2( 920; TAILFOLD-NEXT: entry: 921; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 922; TAILFOLD: for.body: 923; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 924; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 925; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 926; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 927; TAILFOLD: for.body.backedge: 928; TAILFOLD-NEXT: br label [[FOR_BODY]] 929; TAILFOLD: for.second: 930; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 931; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 932; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 933; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 934; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 935; TAILFOLD: for.end: 936; TAILFOLD-NEXT: ret i32 0 937; 938entry: 939 br label %for.body 940 941for.body: 942 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 943 %inc = add nsw i32 %i.02, 1 944 %cmp = icmp slt i32 %inc, 16 945 br i1 %cmp, label %for.body, label %for.second 946 947for.second: 948 %iprom = sext i32 %i.02 to i64 949 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 950 store i16 0, i16* %b, align 4 951 %cmps = icmp sgt i32 %inc, 16 952 br i1 %cmps, label %for.body, label %for.end 953 954for.end: 955 ret i32 0 956} 957 958 959; Check interaction between block predication and early exits. We need the 960; condition on the early exit to remain dead (i.e. not be used when forming 961; the predicate mask). 962define void @scalar_predication(float* %addr) { 963; CHECK-LABEL: @scalar_predication( 964; CHECK-NEXT: entry: 965; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 966; CHECK: vector.ph: 967; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 968; CHECK: vector.body: 969; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 970; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 971; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 972; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 973; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 974; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 975; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 976; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 977; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 978; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 979; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 980; CHECK: pred.store.if: 981; CHECK-NEXT: store float 1.000000e+01, float* [[TMP1]], align 4 982; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 983; CHECK: pred.store.continue: 984; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 985; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 986; CHECK: pred.store.if1: 987; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 988; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]] 989; CHECK-NEXT: store float 1.000000e+01, float* [[TMP9]], align 4 990; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 991; CHECK: pred.store.continue2: 992; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 993; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 994; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 995; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] 996; CHECK: middle.block: 997; CHECK-NEXT: br label [[SCALAR_PH]] 998; CHECK: scalar.ph: 999; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1000; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1001; CHECK: loop.header: 1002; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1003; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1004; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1005; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1006; CHECK: loop.body: 1007; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4 1008; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00 1009; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1010; CHECK: then: 1011; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1012; CHECK-NEXT: br label [[LOOP_LATCH]] 1013; CHECK: loop.latch: 1014; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1015; CHECK-NEXT: br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]] 1016; CHECK: exit: 1017; CHECK-NEXT: ret void 1018; 1019; TAILFOLD-LABEL: @scalar_predication( 1020; TAILFOLD-NEXT: entry: 1021; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1022; TAILFOLD: loop.header: 1023; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1024; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1025; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1026; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1027; TAILFOLD: loop.body: 1028; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1029; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1030; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1031; TAILFOLD: then: 1032; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1033; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1034; TAILFOLD: loop.latch: 1035; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1036; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1037; TAILFOLD: exit: 1038; TAILFOLD-NEXT: ret void 1039; 1040entry: 1041 br label %loop.header 1042 1043loop.header: 1044 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1045 %gep = getelementptr float, float* %addr, i64 %iv 1046 %exitcond.not = icmp eq i64 %iv, 200 1047 br i1 %exitcond.not, label %exit, label %loop.body 1048 1049loop.body: 1050 %0 = load float, float* %gep, align 4 1051 %pred = fcmp oeq float %0, 0.0 1052 br i1 %pred, label %loop.latch, label %then 1053 1054then: 1055 store float 10.0, float* %gep, align 4 1056 br label %loop.latch 1057 1058loop.latch: 1059 %iv.next = add nuw nsw i64 %iv, 1 1060 br label %loop.header 1061 1062exit: 1063 ret void 1064} 1065 1066define i32 @me_reduction(i32* %addr) { 1067; CHECK-LABEL: @me_reduction( 1068; CHECK-NEXT: entry: 1069; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1070; CHECK: vector.ph: 1071; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1072; CHECK: vector.body: 1073; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1074; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1075; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1076; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1077; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1078; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1079; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1080; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1081; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1082; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1083; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 1084; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1085; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1086; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] 1087; CHECK: middle.block: 1088; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef> 1089; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]] 1090; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0 1091; CHECK-NEXT: br label [[SCALAR_PH]] 1092; CHECK: scalar.ph: 1093; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1094; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1095; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1096; CHECK: loop.header: 1097; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1098; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1099; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1100; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1101; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1102; CHECK: loop.latch: 1103; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1104; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1105; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1106; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1107; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]] 1108; CHECK: exit: 1109; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1110; CHECK-NEXT: ret i32 [[LCSSA]] 1111; 1112; TAILFOLD-LABEL: @me_reduction( 1113; TAILFOLD-NEXT: entry: 1114; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1115; TAILFOLD: loop.header: 1116; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1117; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1118; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1119; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1120; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1121; TAILFOLD: loop.latch: 1122; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1123; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1124; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1125; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1126; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1127; TAILFOLD: exit: 1128; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1129; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1130; 1131entry: 1132 br label %loop.header 1133 1134loop.header: 1135 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1136 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1137 %gep = getelementptr i32, i32* %addr, i64 %iv 1138 %exitcond.not = icmp eq i64 %iv, 200 1139 br i1 %exitcond.not, label %exit, label %loop.latch 1140 1141loop.latch: 1142 %0 = load i32, i32* %gep, align 4 1143 %accum.next = add i32 %accum, %0 1144 %iv.next = add nuw nsw i64 %iv, 1 1145 %exitcond2.not = icmp eq i64 %iv, 400 1146 br i1 %exitcond2.not, label %exit, label %loop.header 1147 1148exit: 1149 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1150 ret i32 %lcssa 1151} 1152 1153; TODO: The current definition of reduction is too strict, we can vectorize 1154; this. There's an analogous single exit case where we extract the N-1 1155; value of the reduction that we can also handle. If we fix the later, the 1156; multiple exit case probably falls out. 1157define i32 @me_reduction2(i32* %addr) { 1158; CHECK-LABEL: @me_reduction2( 1159; CHECK-NEXT: entry: 1160; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1161; CHECK: loop.header: 1162; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1163; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1164; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1165; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1166; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1167; CHECK: loop.latch: 1168; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1169; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1170; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1171; CHECK-NEXT: br label [[LOOP_HEADER]] 1172; CHECK: exit: 1173; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1174; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1175; 1176; TAILFOLD-LABEL: @me_reduction2( 1177; TAILFOLD-NEXT: entry: 1178; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1179; TAILFOLD: loop.header: 1180; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1181; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1182; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1183; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1184; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1185; TAILFOLD: loop.latch: 1186; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1187; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1188; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1189; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1190; TAILFOLD: exit: 1191; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1192; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1193; 1194entry: 1195 br label %loop.header 1196 1197loop.header: 1198 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1199 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1200 %gep = getelementptr i32, i32* %addr, i64 %iv 1201 %exitcond.not = icmp eq i64 %iv, 200 1202 br i1 %exitcond.not, label %exit, label %loop.latch 1203 1204loop.latch: 1205 %0 = load i32, i32* %gep, align 4 1206 %accum.next = add i32 %accum, %0 1207 %iv.next = add nuw nsw i64 %iv, 1 1208 br label %loop.header 1209 1210exit: 1211 ret i32 %accum 1212} 1213 1214