1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s 3 4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5;; Test base heuristic 1: 6;; highly-biased selects assumed to be highly predictable, converted to branches 7;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8 9; If a select is obviously predictable, turn it into a branch. 10define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { 11; CHECK-LABEL: @weighted_select1( 12; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 13; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] 14; CHECK: select.false: 15; CHECK-NEXT: br label [[SELECT_END]] 16; CHECK: select.end: 17; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 18; CHECK-NEXT: ret i32 [[SEL]] 19; 20 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 21 ret i32 %sel 22} 23 24; If a select is obviously predictable (reversed profile weights), 25; turn it into a branch. 26define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) { 27; CHECK-LABEL: @weighted_select2( 28; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 29; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] 30; CHECK: select.false: 31; CHECK-NEXT: br label [[SELECT_END]] 32; CHECK: select.end: 33; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 34; CHECK-NEXT: ret i32 [[SEL]] 35; 36 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 37 ret i32 %sel 38} 39 40; Not obvioulsy predictable select. 41define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) { 42; CHECK-LABEL: @weighted_select3( 43; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]] 44; CHECK-NEXT: ret i32 [[SEL]] 45; 46 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17 47 ret i32 %sel 48} 49 50; Unpredictable select should not form a branch. 51define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) { 52; CHECK-LABEL: @unpred_select( 53; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable !19 54; CHECK-NEXT: ret i32 [[SEL]] 55; 56 %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20 57 ret i32 %sel 58} 59 60; Predictable select in function with optsize attribute should not form branch. 61define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize { 62; CHECK-LABEL: @weighted_select_optsize( 63; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] 64; CHECK-NEXT: ret i32 [[SEL]] 65; 66 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 67 ret i32 %sel 68} 69 70define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 { 71; CHECK-LABEL: @weighted_select_pgso( 72; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] 73; CHECK-NEXT: ret i32 [[SEL]] 74; 75 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 76 ret i32 %sel 77} 78 79; If two selects in a row are predictable, turn them into branches. 80define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { 81; CHECK-LABEL: @weighted_selects( 82; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 83; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]] 84; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] 85; CHECK: select.false: 86; CHECK-NEXT: br label [[SELECT_END]] 87; CHECK: select.end: 88; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 89; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 90; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]] 91; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] 92; CHECK: select.false2: 93; CHECK-NEXT: br label [[SELECT_END1]] 94; CHECK: select.end1: 95; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ] 96; CHECK-NEXT: ret i32 [[SEL1]] 97; 98 %cmp = icmp ne i32 %a, 0 99 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 100 %cmp1 = icmp ne i32 %sel, 0 101 %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15 102 ret i32 %sel1 103} 104 105; If select group predictable, turn it into a branch. 106define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { 107; CHECK-LABEL: @weighted_select_group( 108; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 109; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 110; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] 111; CHECK: select.true.sink: 112; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 113; CHECK-NEXT: br label [[SELECT_END:%.*]] 114; CHECK: select.false.sink: 115; CHECK-NEXT: [[B1:%.*]] = add i32 [[B:%.*]], 1 116; CHECK-NEXT: br label [[SELECT_END]] 117; CHECK: select.end: 118; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] 119; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] 120; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] 121; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] 122; CHECK-NEXT: ret i32 [[ADD]] 123; 124 %a1 = add i32 %a, 1 125 %b1 = add i32 %b, 1 126 %c1 = add i32 %c, 1 127 %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15 128 call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) 129 %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15 130 %add = add i32 %sel1, %sel2 131 ret i32 %add 132} 133 134; Predictable select group with intra-group dependence converted to branch 135define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { 136; CHECK-LABEL: @select_group_intra_group( 137; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 138; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] 139; CHECK: select.false: 140; CHECK-NEXT: br label [[SELECT_END]] 141; CHECK: select.end: 142; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 143; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ] 144; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]] 145; CHECK-NEXT: ret i32 [[SUB]] 146; 147 %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15 148 %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15 149 %sub = sub i32 %sel1, %sel2 150 ret i32 %sub 151} 152 153;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 154;; Test base heuristic 2: 155;; look for expensive instructions in the one-use slice of the cold path 156;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 157 158; Select with cold one-use load value operand should form branch and 159; sink load 160define i32 @expensive_val_operand1(ptr nocapture %a, i32 %y, i1 %cmp) { 161; CHECK-LABEL: @expensive_val_operand1( 162; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 163; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 164; CHECK: select.true.sink: 165; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 166; CHECK-NEXT: br label [[SELECT_END]] 167; CHECK: select.end: 168; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 169; CHECK-NEXT: ret i32 [[SEL]] 170; 171 %load = load i32, ptr %a, align 8 172 %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17 173 ret i32 %sel 174} 175 176; Expensive hot value operand and cheap cold value operand. 177define i32 @expensive_val_operand2(ptr nocapture %a, i32 %x, i1 %cmp) { 178; CHECK-LABEL: @expensive_val_operand2( 179; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 180; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]] 181; CHECK-NEXT: ret i32 [[SEL]] 182; 183 %load = load i32, ptr %a, align 8 184 %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17 185 ret i32 %sel 186} 187 188; Cold value operand with load in its one-use dependence slice shoud result 189; into a branch with sinked dependence slice. 190define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { 191; CHECK-LABEL: @expensive_val_operand3( 192; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 193; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 194; CHECK: select.true.sink: 195; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 196; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] 197; CHECK-NEXT: br label [[SELECT_END]] 198; CHECK: select.end: 199; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 200; CHECK-NEXT: ret i32 [[SEL]] 201; 202 %load = load i32, ptr %a, align 8 203 %x = add i32 %load, %b 204 %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 205 ret i32 %sel 206} 207 208; Multiple uses of the load value operand. 209define i32 @expensive_val_operand4(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) { 210; CHECK-LABEL: @expensive_val_operand4( 211; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4 212; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]] 213; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]] 214; CHECK-NEXT: ret i32 [[ADD]] 215; 216 %load = load i32, ptr %b, align 4 217 %sel = select i1 %cmp, i32 %x, i32 %load 218 %add = add i32 %sel, %load 219 ret i32 %add 220} 221 222;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 223;; Test loop heuristic: loop-level critical-path analysis 224;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 225 226;; Use of cmov in this test would put a load and a fsub on the critical path. 227;; Loop-level analysis should decide to form a branch. 228;; 229;;double cmov_on_critical_path(int n, double x, ptr a) { 230;; for (int i = 0; i < n; i++) { 231;; double r = a[i]; 232;; if (x > r) 233;; // 50% of iterations 234;; x -= r; 235;; } 236;; return x; 237;;} 238define double @cmov_on_critical_path(i32 %n, double %x, ptr nocapture %a) { 239; CHECK-LABEL: @cmov_on_critical_path( 240; CHECK-NEXT: entry: 241; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 242; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 243; CHECK: for.cond.cleanup: 244; CHECK-NEXT: ret double [[X:%.*]] 245; CHECK: for.body.preheader: 246; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 247; CHECK-NEXT: br label [[FOR_BODY:%.*]] 248; CHECK: for.body: 249; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 250; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] 251; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 252; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 253; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] 254; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] 255; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] 256; CHECK: select.true.sink: 257; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] 258; CHECK-NEXT: br label [[SELECT_END]] 259; CHECK: select.end: 260; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] 261; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 262; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 263; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 264; CHECK: for.exit: 265; CHECK-NEXT: ret double [[X2]] 266; 267entry: 268 %cmp1 = icmp sgt i32 %n, 0 269 br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup 270 271for.cond.cleanup: ; preds = %entry 272 ret double %x 273 274for.body.preheader: ; preds = %entry 275 %wide.trip.count = zext i32 %n to i64 276 br label %for.body 277 278for.body: ; preds = %for.body.preheader, %for.body 279 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 280 %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] 281 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 282 %r = load double, ptr %arrayidx, align 8 283 %sub = fsub double %x1, %r 284 %cmp2 = fcmp ogt double %x1, %r 285 %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18 286 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 287 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 288 br i1 %exitcond, label %for.exit, label %for.body 289 290for.exit: ; preds = %for.body 291 ret double %x2 292} 293 294;; The common path includes expensive operations (load and fsub) making 295;; branch similarly expensive to cmov, and thus the gain is small. 296;; Loop-level analysis should decide on not forming a branch. 297;; 298;;double small_gain(int n, double x, ptr a) { 299;; for (int i = 0; i < n; i++) { 300;; double r = a[i]; 301;; if (x > r) 302;; // 99% of iterations 303;; x -= r; 304;; } 305;; return x; 306;;} 307define double @small_gain(i32 %n, double %x, ptr nocapture %a) { 308; CHECK-LABEL: @small_gain( 309; CHECK-NEXT: entry: 310; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 311; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 312; CHECK: for.cond.cleanup: 313; CHECK-NEXT: ret double [[X:%.*]] 314; CHECK: for.body.preheader: 315; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 316; CHECK-NEXT: br label [[FOR_BODY:%.*]] 317; CHECK: for.body: 318; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 319; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] 320; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 321; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 322; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] 323; CHECK-NEXT: [[CMP2:%.*]] = fcmp ole double [[X1]], [[R]] 324; CHECK-NEXT: [[X2]] = select i1 [[CMP2]], double [[X1]], double [[SUB]], !prof [[PROF18]] 325; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 326; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 327; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 328; CHECK: for.exit: 329; CHECK-NEXT: ret double [[X2]] 330; 331entry: 332 %cmp1 = icmp sgt i32 %n, 0 333 br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup 334 335for.cond.cleanup: ; preds = %entry 336 ret double %x 337 338for.body.preheader: ; preds = %entry 339 %wide.trip.count = zext i32 %n to i64 340 br label %for.body 341 342for.body: ; preds = %for.body.preheader, %for.body 343 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 344 %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] 345 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 346 %r = load double, ptr %arrayidx, align 8 347 %sub = fsub double %x1, %r 348 %cmp2 = fcmp ole double %x1, %r 349 %x2 = select i1 %cmp2, double %x1, double %sub, !prof !17 350 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 351 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 352 br i1 %exitcond, label %for.exit, label %for.body 353 354for.exit: ; preds = %for.body 355 ret double %x2 356} 357 358;; Use of a branch in this test would avoid executing a load and several 359;; floating-point operations for most cases (70% of the time). 360;; Yet, the gain is not increasing much per iteration (small gradient gain). 361;; Loop-level analysis should decide not to form a branch. 362;; 363;;double small_gradient(int n, double x, ptr a) { 364;; for (int i = 0; i < n; i++) { 365;; double r = 2 * a[i] + i; 366;; if (r > 0) 367;; // 30% of iterations 368;; x -= r; 369;; } 370;; return x; 371;;} 372define double @small_gradient(i32 %n, double %x, ptr nocapture %a) { 373; CHECK-LABEL: @small_gradient( 374; CHECK-NEXT: entry: 375; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0 376; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 377; CHECK: for.body.preheader: 378; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 379; CHECK-NEXT: br label [[FOR_BODY:%.*]] 380; CHECK: for.cond.cleanup: 381; CHECK-NEXT: [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_ADDR_1:%.*]], [[FOR_BODY]] ] 382; CHECK-NEXT: ret double [[X_ADDR_0_LCSSA]] 383; CHECK: for.body: 384; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 385; CHECK-NEXT: [[X_ADDR_010:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[FOR_BODY]] ] 386; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 387; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 388; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 2.000000e+00, double 1.000000e+00) 389; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[TMP1]], 0.000000e+00 390; CHECK-NEXT: [[SUB:%.*]] = select i1 [[CMP1]], double [[TMP1]], double 0.000000e+00, !prof [[PROF28:![0-9]+]] 391; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_010]], [[SUB]] 392; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 393; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 394; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] 395; 396entry: 397 %cmp8 = icmp sgt i32 %n, 0 398 br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup 399 400for.body.preheader: ; preds = %entry 401 %wide.trip.count = zext i32 %n to i64 402 br label %for.body 403 404for.cond.cleanup: ; preds = %for.body, %entry 405 %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.body ] 406 ret double %x.addr.0.lcssa 407 408for.body: ; preds = %for.body.preheader, %for.body 409 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 410 %x.addr.010 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ] 411 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 412 %0 = load double, ptr %arrayidx, align 8 413 %1 = call double @llvm.fmuladd.f64(double %0, double 2.000000e+00, double 1.000000e+00) 414 %cmp1 = fcmp ogt double %1, 0.000000e+00 415 %sub = select i1 %cmp1, double %1, double 0.000000e+00, !prof !28 416 %x.addr.1 = fsub double %x.addr.010, %sub 417 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 418 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 419 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 420} 421 422;; One select on the critical path and one off the critical path. 423;; Loop-level analysis should decide to form a branch only for 424;; the select on the critical path. 425;; 426;;double loop_select_groups(int n, double x, ptr a, int k) { 427;; int c = 0; 428;; for (int i = 0; i < n; i++) { 429;; double r = a[i]; 430;; if (x > r) 431;; x -= r; 432;; if (i == k) 433;; c += n; 434;; } 435;; return x + c; 436;;} 437define double @loop_select_groups(i32 %n, double %x, ptr nocapture %a, i32 %k) { 438; CHECK-LABEL: @loop_select_groups( 439; CHECK-NEXT: entry: 440; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N:%.*]], 0 441; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 442; CHECK: for.body.preheader: 443; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 444; CHECK-NEXT: br label [[FOR_BODY:%.*]] 445; CHECK: for.cond.cleanup.loopexit: 446; CHECK-NEXT: [[PHI_CAST:%.*]] = sitofp i32 [[C_1:%.*]] to double 447; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 448; CHECK: for.cond.cleanup: 449; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] 450; CHECK-NEXT: [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY]] ], [ [[X_ADDR_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] 451; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]] 452; CHECK-NEXT: ret double [[ADD5]] 453; CHECK: for.body: 454; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ] 455; CHECK-NEXT: [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ] 456; CHECK-NEXT: [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ] 457; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 458; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 459; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]] 460; CHECK-NEXT: [[SUB_FROZEN:%.*]] = freeze i1 [[CMP1]] 461; CHECK-NEXT: br i1 [[SUB_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] 462; CHECK: select.false: 463; CHECK-NEXT: br label [[SELECT_END]] 464; CHECK: select.end: 465; CHECK-NEXT: [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ] 466; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]] 467; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 468; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]] 469; CHECK-NEXT: [[ADD:%.*]] = select i1 [[CMP2]], i32 [[N]], i32 0 470; CHECK-NEXT: [[C_1]] = add nsw i32 [[ADD]], [[C_020]] 471; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 472; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 473; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] 474; 475entry: 476 %cmp19 = icmp sgt i32 %n, 0 477 br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup 478 479for.body.preheader: ; preds = %entry 480 %wide.trip.count = zext i32 %n to i64 481 br label %for.body 482 483for.cond.cleanup.loopexit: ; preds = %for.body 484 %phi.cast = sitofp i32 %c.1 to double 485 br label %for.cond.cleanup 486 487for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 488 %c.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %phi.cast, %for.cond.cleanup.loopexit ] 489 %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.cond.cleanup.loopexit ] 490 %add5 = fadd double %x.addr.0.lcssa, %c.0.lcssa 491 ret double %add5 492 493for.body: ; preds = %for.body.preheader, %for.body 494 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 495 %x.addr.022 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ] 496 %c.020 = phi i32 [ 0, %for.body.preheader ], [ %c.1, %for.body ] 497 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 498 %0 = load double, ptr %arrayidx, align 8 499 %cmp1 = fcmp ogt double %x.addr.022, %0 500 %sub = select i1 %cmp1, double %0, double 0.000000e+00 501 %x.addr.1 = fsub double %x.addr.022, %sub 502 %1 = trunc i64 %indvars.iv to i32 503 %cmp2 = icmp eq i32 %k, %n 504 %add = select i1 %cmp2, i32 %n, i32 0 505 %c.1 = add nsw i32 %add, %c.020 506 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 507 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 508 br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body 509} 510 511; Function Attrs: nounwind readnone speculatable willreturn 512declare void @llvm.dbg.value(metadata, metadata, metadata) 513 514; Function Attrs: mustprogress nofree nosync nounwind readnone speculatable willreturn 515declare double @llvm.fmuladd.f64(double, double, double) 516 517!llvm.module.flags = !{!0, !26, !27} 518!0 = !{i32 1, !"ProfileSummary", !1} 519!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 520!2 = !{!"ProfileFormat", !"InstrProf"} 521!3 = !{!"TotalCount", i64 10000} 522!4 = !{!"MaxCount", i64 10} 523!5 = !{!"MaxInternalCount", i64 1} 524!6 = !{!"MaxFunctionCount", i64 1000} 525!7 = !{!"NumCounts", i64 3} 526!8 = !{!"NumFunctions", i64 3} 527!9 = !{!"DetailedSummary", !10} 528!10 = !{!11, !12, !13} 529!11 = !{i32 10000, i64 100, i32 1} 530!12 = !{i32 999000, i64 100, i32 1} 531!13 = !{i32 999999, i64 1, i32 2} 532!14 = !{!"function_entry_count", i64 0} 533!15 = !{!"branch_weights", i32 1, i32 100} 534!16 = !{!"branch_weights", i32 100, i32 1} 535!17 = !{!"branch_weights", i32 1, i32 99} 536!18 = !{!"branch_weights", i32 50, i32 50} 537!19 = !{!"function_entry_count", i64 100} 538!20 = !{} 539!21 = !DIFile(filename: "test.c", directory: "/test") 540!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None) 541!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22) 542!24 = !DILocalVariable(name: "x", scope: !23) 543!25 = !{} 544!26 = !{i32 2, !"Dwarf Version", i32 4} 545!27 = !{i32 1, !"Debug Info Version", i32 3} 546!28 = !{!"branch_weights", i32 30, i32 70} 547