1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 11; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 12; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 13; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 14; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 15; CHECK: vector.ph: 16; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 17; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] 18; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 19; CHECK: vector.body: 20; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 22; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP2]] to i64 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]] 24; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 25; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* 26; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4 27; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 28; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 29; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 30; CHECK: middle.block: 31; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 32; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 33; CHECK: scalar.ph: 34; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 35; CHECK-NEXT: br label [[FOR_COND:%.*]] 36; CHECK: for.cond: 37; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 38; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 39; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 40; CHECK-NEXT: store i16 0, i16* [[B]], align 4 41; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 42; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 43; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 44; CHECK: if.end: 45; CHECK-NEXT: ret void 46; 47; TAILFOLD-LABEL: @bottom_tested( 48; TAILFOLD-NEXT: entry: 49; TAILFOLD-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 50; TAILFOLD-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 51; TAILFOLD-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 52; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 53; TAILFOLD: vector.ph: 54; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1 55; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 56; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 57; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1 58; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 59; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 60; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 61; TAILFOLD: vector.body: 62; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 63; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 64; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 65; TAILFOLD-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 1 66; TAILFOLD-NEXT: [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 67; TAILFOLD-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 68; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 69; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 70; TAILFOLD: pred.store.if: 71; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 72; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 73; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 74; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 75; TAILFOLD: pred.store.continue: 76; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 77; TAILFOLD-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 78; TAILFOLD: pred.store.if1: 79; TAILFOLD-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 80; TAILFOLD-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]] 81; TAILFOLD-NEXT: store i16 0, i16* [[TMP11]], align 4 82; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 83; TAILFOLD: pred.store.continue2: 84; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 85; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 86; TAILFOLD-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 87; TAILFOLD-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 88; TAILFOLD: middle.block: 89; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 90; TAILFOLD: scalar.ph: 91; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 92; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 93; TAILFOLD: for.cond: 94; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 95; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 96; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 97; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 98; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 99; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 100; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 101; TAILFOLD: if.end: 102; TAILFOLD-NEXT: ret void 103; 104entry: 105 br label %for.cond 106 107for.cond: 108 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 109 %iprom = sext i32 %i to i64 110 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 111 store i16 0, i16* %b, align 4 112 %inc = add nsw i32 %i, 1 113 %cmp = icmp slt i32 %i, %n 114 br i1 %cmp, label %for.cond, label %if.end 115 116if.end: 117 ret void 118} 119 120define void @early_exit(i16* %p, i32 %n) { 121; CHECK-LABEL: @early_exit( 122; CHECK-NEXT: entry: 123; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 124; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 125; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1 126; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2 127; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 128; CHECK: vector.ph: 129; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 130; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 131; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]] 132; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]] 133; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 134; CHECK: vector.body: 135; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 136; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 137; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 138; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 139; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP4]] to i64 140; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 141; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0 142; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>* 143; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4 144; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 145; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 146; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 147; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 148; CHECK: middle.block: 149; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] 150; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 151; CHECK: scalar.ph: 152; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 153; CHECK-NEXT: br label [[FOR_COND:%.*]] 154; CHECK: for.cond: 155; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 156; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 157; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 158; CHECK: for.body: 159; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 160; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 161; CHECK-NEXT: store i16 0, i16* [[B]], align 4 162; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 163; CHECK-NEXT: br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]] 164; CHECK: if.end: 165; CHECK-NEXT: ret void 166; 167; TAILFOLD-LABEL: @early_exit( 168; TAILFOLD-NEXT: entry: 169; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 170; TAILFOLD: for.cond: 171; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 172; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 173; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 174; TAILFOLD: for.body: 175; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 176; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 177; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 178; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 179; TAILFOLD-NEXT: br label [[FOR_COND]] 180; TAILFOLD: if.end: 181; TAILFOLD-NEXT: ret void 182; 183entry: 184 br label %for.cond 185 186for.cond: 187 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 188 %cmp = icmp slt i32 %i, %n 189 br i1 %cmp, label %for.body, label %if.end 190 191for.body: 192 %iprom = sext i32 %i to i64 193 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 194 store i16 0, i16* %b, align 4 195 %inc = add nsw i32 %i, 1 196 br label %for.cond 197 198if.end: 199 ret void 200} 201 202; Same as early_exit, but with optsize to prevent the use of 203; a scalar epilogue. -- Can't vectorize this in either case. 204define void @optsize(i16* %p, i32 %n) optsize { 205; CHECK-LABEL: @optsize( 206; CHECK-NEXT: entry: 207; CHECK-NEXT: br label [[FOR_COND:%.*]] 208; CHECK: for.cond: 209; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 210; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 211; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 212; CHECK: for.body: 213; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 214; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 215; CHECK-NEXT: store i16 0, i16* [[B]], align 4 216; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 217; CHECK-NEXT: br label [[FOR_COND]] 218; CHECK: if.end: 219; CHECK-NEXT: ret void 220; 221; TAILFOLD-LABEL: @optsize( 222; TAILFOLD-NEXT: entry: 223; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 224; TAILFOLD: for.cond: 225; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 226; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 227; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 228; TAILFOLD: for.body: 229; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 230; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 231; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 232; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 233; TAILFOLD-NEXT: br label [[FOR_COND]] 234; TAILFOLD: if.end: 235; TAILFOLD-NEXT: ret void 236; 237entry: 238 br label %for.cond 239 240for.cond: 241 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 242 %cmp = icmp slt i32 %i, %n 243 br i1 %cmp, label %for.body, label %if.end 244 245for.body: 246 %iprom = sext i32 %i to i64 247 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 248 store i16 0, i16* %b, align 4 249 %inc = add nsw i32 %i, 1 250 br label %for.cond 251 252if.end: 253 ret void 254} 255 256 257; multiple exit - no values inside the loop used outside 258define void @multiple_unique_exit(i16* %p, i32 %n) { 259; CHECK-LABEL: @multiple_unique_exit( 260; CHECK-NEXT: entry: 261; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 262; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 263; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 264; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 265; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 266; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 267; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 268; CHECK: vector.ph: 269; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 270; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 271; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 272; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 273; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 274; CHECK: vector.body: 275; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 276; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 277; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 278; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 279; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 280; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 281; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 282; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 283; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 284; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 285; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 286; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 287; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] 288; CHECK: middle.block: 289; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] 290; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 291; CHECK: scalar.ph: 292; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 293; CHECK-NEXT: br label [[FOR_COND:%.*]] 294; CHECK: for.cond: 295; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 296; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 297; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 298; CHECK: for.body: 299; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 300; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 301; CHECK-NEXT: store i16 0, i16* [[B]], align 4 302; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 303; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 304; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]] 305; CHECK: if.end: 306; CHECK-NEXT: ret void 307; 308; TAILFOLD-LABEL: @multiple_unique_exit( 309; TAILFOLD-NEXT: entry: 310; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 311; TAILFOLD: for.cond: 312; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 313; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 314; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 315; TAILFOLD: for.body: 316; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 317; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 318; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 319; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 320; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 321; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 322; TAILFOLD: if.end: 323; TAILFOLD-NEXT: ret void 324; 325entry: 326 br label %for.cond 327 328for.cond: 329 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 330 %cmp = icmp slt i32 %i, %n 331 br i1 %cmp, label %for.body, label %if.end 332 333for.body: 334 %iprom = sext i32 %i to i64 335 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 336 store i16 0, i16* %b, align 4 337 %inc = add nsw i32 %i, 1 338 %cmp2 = icmp slt i32 %i, 2096 339 br i1 %cmp2, label %for.cond, label %if.end 340 341if.end: 342 ret void 343} 344 345; multiple exit - with an lcssa phi 346define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 347; CHECK-LABEL: @multiple_unique_exit2( 348; CHECK-NEXT: entry: 349; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 350; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 351; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 352; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 353; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 354; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 355; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 356; CHECK: vector.ph: 357; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 358; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 359; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 360; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 361; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 362; CHECK: vector.body: 363; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 364; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 365; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 366; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 367; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 368; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 369; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 370; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 371; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 372; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 373; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 374; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 375; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] 376; CHECK: middle.block: 377; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] 378; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1 379; CHECK-NEXT: [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1 380; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 381; CHECK: scalar.ph: 382; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 383; CHECK-NEXT: br label [[FOR_COND:%.*]] 384; CHECK: for.cond: 385; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 386; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 387; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 388; CHECK: for.body: 389; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 390; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 391; CHECK-NEXT: store i16 0, i16* [[B]], align 4 392; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 393; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 394; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]] 395; CHECK: if.end: 396; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ] 397; CHECK-NEXT: ret i32 [[I_LCSSA]] 398; 399; TAILFOLD-LABEL: @multiple_unique_exit2( 400; TAILFOLD-NEXT: entry: 401; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 402; TAILFOLD: for.cond: 403; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 404; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 405; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 406; TAILFOLD: for.body: 407; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 408; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 409; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 410; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 411; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 412; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 413; TAILFOLD: if.end: 414; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 415; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 416; 417entry: 418 br label %for.cond 419 420for.cond: 421 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 422 %cmp = icmp slt i32 %i, %n 423 br i1 %cmp, label %for.body, label %if.end 424 425for.body: 426 %iprom = sext i32 %i to i64 427 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 428 store i16 0, i16* %b, align 4 429 %inc = add nsw i32 %i, 1 430 %cmp2 = icmp slt i32 %i, 2096 431 br i1 %cmp2, label %for.cond, label %if.end 432 433if.end: 434 ret i32 %i 435} 436 437; multiple exit w/a non lcssa phi 438define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 439; CHECK-LABEL: @multiple_unique_exit3( 440; CHECK-NEXT: entry: 441; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0 442; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0 443; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096 444; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096 445; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1 446; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2 447; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 448; CHECK: vector.ph: 449; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2 450; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 451; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]] 452; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]] 453; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 454; CHECK: vector.body: 455; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 456; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 457; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 458; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 459; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[TMP5]] to i64 460; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]] 461; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 462; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>* 463; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4 464; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 465; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 466; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 467; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] 468; CHECK: middle.block: 469; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] 470; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 471; CHECK: scalar.ph: 472; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 473; CHECK-NEXT: br label [[FOR_COND:%.*]] 474; CHECK: for.cond: 475; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 476; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 477; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 478; CHECK: for.body: 479; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 480; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 481; CHECK-NEXT: store i16 0, i16* [[B]], align 4 482; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 483; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 484; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]] 485; CHECK: if.end: 486; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ] 487; CHECK-NEXT: ret i32 [[EXIT]] 488; 489; TAILFOLD-LABEL: @multiple_unique_exit3( 490; TAILFOLD-NEXT: entry: 491; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 492; TAILFOLD: for.cond: 493; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 494; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 495; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 496; TAILFOLD: for.body: 497; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 498; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 499; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 500; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 501; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 502; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 503; TAILFOLD: if.end: 504; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 505; TAILFOLD-NEXT: ret i32 [[EXIT]] 506; 507entry: 508 br label %for.cond 509 510for.cond: 511 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 512 %cmp = icmp slt i32 %i, %n 513 br i1 %cmp, label %for.body, label %if.end 514 515for.body: 516 %iprom = sext i32 %i to i64 517 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 518 store i16 0, i16* %b, align 4 519 %inc = add nsw i32 %i, 1 520 %cmp2 = icmp slt i32 %i, 2096 521 br i1 %cmp2, label %for.cond, label %if.end 522 523if.end: 524 %exit = phi i32 [0, %for.cond], [1, %for.body] 525 ret i32 %exit 526} 527 528; multiple exits w/distinct target blocks 529define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 530; CHECK-LABEL: @multiple_exit_blocks( 531; CHECK-NEXT: entry: 532; CHECK-NEXT: br label [[FOR_COND:%.*]] 533; CHECK: for.cond: 534; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 535; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 536; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 537; CHECK: for.body: 538; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 539; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 540; CHECK-NEXT: store i16 0, i16* [[B]], align 4 541; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 542; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 543; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 544; CHECK: if.end: 545; CHECK-NEXT: ret i32 0 546; CHECK: if.end2: 547; CHECK-NEXT: ret i32 1 548; 549; TAILFOLD-LABEL: @multiple_exit_blocks( 550; TAILFOLD-NEXT: entry: 551; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 552; TAILFOLD: for.cond: 553; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 554; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 555; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 556; TAILFOLD: for.body: 557; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 558; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 559; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 560; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 561; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 562; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 563; TAILFOLD: if.end: 564; TAILFOLD-NEXT: ret i32 0 565; TAILFOLD: if.end2: 566; TAILFOLD-NEXT: ret i32 1 567; 568entry: 569 br label %for.cond 570 571for.cond: 572 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 573 %cmp = icmp slt i32 %i, %n 574 br i1 %cmp, label %for.body, label %if.end 575 576for.body: 577 %iprom = sext i32 %i to i64 578 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 579 store i16 0, i16* %b, align 4 580 %inc = add nsw i32 %i, 1 581 %cmp2 = icmp slt i32 %i, 2096 582 br i1 %cmp2, label %for.cond, label %if.end2 583 584if.end: 585 ret i32 0 586 587if.end2: 588 ret i32 1 589} 590 591; unique exit case but with a switch as two edges between the same pair of 592; blocks is an often missed edge case 593define i32 @multiple_exit_switch(i16* %p, i32 %n) { 594; CHECK-LABEL: @multiple_exit_switch( 595; CHECK-NEXT: entry: 596; CHECK-NEXT: br label [[FOR_COND:%.*]] 597; CHECK: for.cond: 598; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 599; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 600; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 601; CHECK-NEXT: store i16 0, i16* [[B]], align 4 602; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 603; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 604; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 605; CHECK-NEXT: i32 2097, label [[IF_END]] 606; CHECK-NEXT: ] 607; CHECK: if.end: 608; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 609; CHECK-NEXT: ret i32 [[I_LCSSA]] 610; 611; TAILFOLD-LABEL: @multiple_exit_switch( 612; TAILFOLD-NEXT: entry: 613; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 614; TAILFOLD: for.cond: 615; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 616; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 617; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 618; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 619; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 620; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 621; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 622; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 623; TAILFOLD-NEXT: ] 624; TAILFOLD: if.end: 625; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 626; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 627; 628entry: 629 br label %for.cond 630 631for.cond: 632 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 633 %iprom = sext i32 %i to i64 634 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 635 store i16 0, i16* %b, align 4 636 %inc = add nsw i32 %i, 1 637 switch i32 %i, label %for.cond [ 638 i32 2096, label %if.end 639 i32 2097, label %if.end 640 ] 641 642if.end: 643 ret i32 %i 644} 645 646; multiple exit case but with a switch as multiple exiting edges from 647; a single block is a commonly missed edge case 648define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 649; CHECK-LABEL: @multiple_exit_switch2( 650; CHECK-NEXT: entry: 651; CHECK-NEXT: br label [[FOR_COND:%.*]] 652; CHECK: for.cond: 653; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 654; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 655; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 656; CHECK-NEXT: store i16 0, i16* [[B]], align 4 657; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 658; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 659; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 660; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 661; CHECK-NEXT: ] 662; CHECK: if.end: 663; CHECK-NEXT: ret i32 0 664; CHECK: if.end2: 665; CHECK-NEXT: ret i32 1 666; 667; TAILFOLD-LABEL: @multiple_exit_switch2( 668; TAILFOLD-NEXT: entry: 669; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 670; TAILFOLD: for.cond: 671; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 672; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 673; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 674; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 675; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 676; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 677; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 678; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 679; TAILFOLD-NEXT: ] 680; TAILFOLD: if.end: 681; TAILFOLD-NEXT: ret i32 0 682; TAILFOLD: if.end2: 683; TAILFOLD-NEXT: ret i32 1 684; 685entry: 686 br label %for.cond 687 688for.cond: 689 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 690 %iprom = sext i32 %i to i64 691 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 692 store i16 0, i16* %b, align 4 693 %inc = add nsw i32 %i, 1 694 switch i32 %i, label %for.cond [ 695 i32 2096, label %if.end 696 i32 2097, label %if.end2 697 ] 698 699if.end: 700 ret i32 0 701 702if.end2: 703 ret i32 1 704} 705 706define i32 @multiple_latch1(i16* %p) { 707; CHECK-LABEL: @multiple_latch1( 708; CHECK-NEXT: entry: 709; CHECK-NEXT: br label [[FOR_BODY:%.*]] 710; CHECK: for.body: 711; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 712; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 713; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 714; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 715; CHECK: for.second: 716; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 717; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 718; CHECK-NEXT: store i16 0, i16* [[B]], align 4 719; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 720; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 721; CHECK: for.body.backedge: 722; CHECK-NEXT: br label [[FOR_BODY]] 723; CHECK: for.end: 724; CHECK-NEXT: ret i32 0 725; 726; TAILFOLD-LABEL: @multiple_latch1( 727; TAILFOLD-NEXT: entry: 728; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 729; TAILFOLD: for.body: 730; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 731; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 732; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 733; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 734; TAILFOLD: for.second: 735; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 736; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 737; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 738; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 739; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 740; TAILFOLD: for.body.backedge: 741; TAILFOLD-NEXT: br label [[FOR_BODY]] 742; TAILFOLD: for.end: 743; TAILFOLD-NEXT: ret i32 0 744; 745entry: 746 br label %for.body 747 748for.body: 749 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 750 %inc = add nsw i32 %i.02, 1 751 %cmp = icmp slt i32 %inc, 16 752 br i1 %cmp, label %for.body.backedge, label %for.second 753 754for.second: 755 %iprom = sext i32 %i.02 to i64 756 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 757 store i16 0, i16* %b, align 4 758 %cmps = icmp sgt i32 %inc, 16 759 br i1 %cmps, label %for.body.backedge, label %for.end 760 761for.body.backedge: 762 br label %for.body 763 764for.end: 765 ret i32 0 766} 767 768 769; two back branches - loop simplify with convert this to the same form 770; as previous before vectorizer sees it, but show that. 771define i32 @multiple_latch2(i16* %p) { 772; CHECK-LABEL: @multiple_latch2( 773; CHECK-NEXT: entry: 774; CHECK-NEXT: br label [[FOR_BODY:%.*]] 775; CHECK: for.body: 776; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 777; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 778; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 779; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 780; CHECK: for.body.backedge: 781; CHECK-NEXT: br label [[FOR_BODY]] 782; CHECK: for.second: 783; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 784; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 785; CHECK-NEXT: store i16 0, i16* [[B]], align 4 786; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 787; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 788; CHECK: for.end: 789; CHECK-NEXT: ret i32 0 790; 791; TAILFOLD-LABEL: @multiple_latch2( 792; TAILFOLD-NEXT: entry: 793; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 794; TAILFOLD: for.body: 795; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 796; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 797; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 798; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 799; TAILFOLD: for.body.backedge: 800; TAILFOLD-NEXT: br label [[FOR_BODY]] 801; TAILFOLD: for.second: 802; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 803; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 804; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 805; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 806; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 807; TAILFOLD: for.end: 808; TAILFOLD-NEXT: ret i32 0 809; 810entry: 811 br label %for.body 812 813for.body: 814 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 815 %inc = add nsw i32 %i.02, 1 816 %cmp = icmp slt i32 %inc, 16 817 br i1 %cmp, label %for.body, label %for.second 818 819for.second: 820 %iprom = sext i32 %i.02 to i64 821 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 822 store i16 0, i16* %b, align 4 823 %cmps = icmp sgt i32 %inc, 16 824 br i1 %cmps, label %for.body, label %for.end 825 826for.end: 827 ret i32 0 828} 829 830 831; Check interaction between block predication and early exits. We need the 832; condition on the early exit to remain dead (i.e. not be used when forming 833; the predicate mask). 834define void @scalar_predication(float* %addr) { 835; CHECK-LABEL: @scalar_predication( 836; CHECK-NEXT: entry: 837; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 838; CHECK: vector.ph: 839; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 840; CHECK: vector.body: 841; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 842; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 843; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 844; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 845; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 846; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 847; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 848; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 849; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 850; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 851; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 852; CHECK: pred.store.if: 853; CHECK-NEXT: store float 1.000000e+01, float* [[TMP1]], align 4 854; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 855; CHECK: pred.store.continue: 856; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 857; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 858; CHECK: pred.store.if1: 859; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 860; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]] 861; CHECK-NEXT: store float 1.000000e+01, float* [[TMP9]], align 4 862; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 863; CHECK: pred.store.continue2: 864; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 865; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 866; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 867; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] 868; CHECK: middle.block: 869; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 870; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 871; CHECK: scalar.ph: 872; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 873; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 874; CHECK: loop.header: 875; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 876; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 877; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 878; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]] 879; CHECK: loop.body: 880; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4 881; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00 882; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 883; CHECK: then: 884; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 885; CHECK-NEXT: br label [[LOOP_LATCH]] 886; CHECK: loop.latch: 887; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 888; CHECK-NEXT: br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]] 889; CHECK: exit: 890; CHECK-NEXT: ret void 891; 892; TAILFOLD-LABEL: @scalar_predication( 893; TAILFOLD-NEXT: entry: 894; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 895; TAILFOLD: loop.header: 896; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 897; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 898; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 899; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 900; TAILFOLD: loop.body: 901; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 902; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 903; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 904; TAILFOLD: then: 905; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 906; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 907; TAILFOLD: loop.latch: 908; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 909; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 910; TAILFOLD: exit: 911; TAILFOLD-NEXT: ret void 912; 913entry: 914 br label %loop.header 915 916loop.header: 917 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 918 %gep = getelementptr float, float* %addr, i64 %iv 919 %exitcond.not = icmp eq i64 %iv, 200 920 br i1 %exitcond.not, label %exit, label %loop.body 921 922loop.body: 923 %0 = load float, float* %gep, align 4 924 %pred = fcmp oeq float %0, 0.0 925 br i1 %pred, label %loop.latch, label %then 926 927then: 928 store float 10.0, float* %gep, align 4 929 br label %loop.latch 930 931loop.latch: 932 %iv.next = add nuw nsw i64 %iv, 1 933 br label %loop.header 934 935exit: 936 ret void 937} 938 939define i32 @me_reduction(i32* %addr) { 940; CHECK-LABEL: @me_reduction( 941; CHECK-NEXT: entry: 942; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 943; CHECK: vector.ph: 944; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 945; CHECK: vector.body: 946; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 947; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 948; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 949; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 950; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 951; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 952; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 953; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 954; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 955; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 956; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 957; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 958; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 959; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] 960; CHECK: middle.block: 961; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef> 962; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]] 963; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0 964; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 965; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 966; CHECK: scalar.ph: 967; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 968; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 969; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 970; CHECK: loop.header: 971; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 972; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 973; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 974; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 975; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]] 976; CHECK: loop.latch: 977; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 978; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 979; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 980; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 981; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]] 982; CHECK: exit: 983; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 984; CHECK-NEXT: ret i32 [[LCSSA]] 985; 986; TAILFOLD-LABEL: @me_reduction( 987; TAILFOLD-NEXT: entry: 988; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 989; TAILFOLD: loop.header: 990; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 991; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 992; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 993; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 994; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 995; TAILFOLD: loop.latch: 996; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 997; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 998; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 999; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1000; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1001; TAILFOLD: exit: 1002; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1003; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1004; 1005entry: 1006 br label %loop.header 1007 1008loop.header: 1009 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1010 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1011 %gep = getelementptr i32, i32* %addr, i64 %iv 1012 %exitcond.not = icmp eq i64 %iv, 200 1013 br i1 %exitcond.not, label %exit, label %loop.latch 1014 1015loop.latch: 1016 %0 = load i32, i32* %gep, align 4 1017 %accum.next = add i32 %accum, %0 1018 %iv.next = add nuw nsw i64 %iv, 1 1019 %exitcond2.not = icmp eq i64 %iv, 400 1020 br i1 %exitcond2.not, label %exit, label %loop.header 1021 1022exit: 1023 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1024 ret i32 %lcssa 1025} 1026 1027; TODO: The current definition of reduction is too strict, we can vectorize 1028; this. There's an analogous single exit case where we extract the N-1 1029; value of the reduction that we can also handle. If we fix the later, the 1030; multiple exit case probably falls out. 1031define i32 @me_reduction2(i32* %addr) { 1032; CHECK-LABEL: @me_reduction2( 1033; CHECK-NEXT: entry: 1034; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1035; CHECK: loop.header: 1036; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1037; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1038; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1039; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1040; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1041; CHECK: loop.latch: 1042; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1043; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1044; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1045; CHECK-NEXT: br label [[LOOP_HEADER]] 1046; CHECK: exit: 1047; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1048; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1049; 1050; TAILFOLD-LABEL: @me_reduction2( 1051; TAILFOLD-NEXT: entry: 1052; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1053; TAILFOLD: loop.header: 1054; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1055; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1056; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1057; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1058; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1059; TAILFOLD: loop.latch: 1060; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1061; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1062; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1063; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1064; TAILFOLD: exit: 1065; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1066; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1067; 1068entry: 1069 br label %loop.header 1070 1071loop.header: 1072 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1073 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1074 %gep = getelementptr i32, i32* %addr, i64 %iv 1075 %exitcond.not = icmp eq i64 %iv, 200 1076 br i1 %exitcond.not, label %exit, label %loop.latch 1077 1078loop.latch: 1079 %0 = load i32, i32* %gep, align 4 1080 %accum.next = add i32 %accum, %0 1081 %iv.next = add nuw nsw i64 %iv, 1 1082 br label %loop.header 1083 1084exit: 1085 ret i32 %accum 1086} 1087 1088