1; 2; Here we have 5-way unswitchable switch with each successor also having an unswitchable 3; exiting branch in it. If we start unswitching those branches we start duplicating the 4; whole switch. This can easily lead to exponential behavior w/o proper control. 5; On a real-life testcase there was 16-way switch and that took forever to compile w/o 6; a cost control. 7; 8; 9; When we use the stricted multiplier candidates formula (unscaled candidates == 0) 10; we should be getting just a single loop. 11; 12; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 13; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \ 14; RUN: -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 15; 16; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 17; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \ 18; RUN: -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 19; 20; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 21; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=1 \ 22; RUN: -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 23; 24; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 25; RUN: -unswitch-num-initial-unscaled-candidates=0 -unswitch-siblings-toplevel-div=16 \ 26; RUN: -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | FileCheck %s --check-prefixes=LOOP1 27; 28; With relaxed candidates multiplier (unscaled candidates == 8) we should allow 29; some unswitches to happen until siblings multiplier starts kicking in: 30; 31; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 32; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \ 33; RUN: -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 34; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX 35; 36; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 37; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=1 \ 38; RUN: -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 39; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX 40; 41; With relaxed candidates multiplier (unscaled candidates == 8) and with relaxed 42; siblings multiplier for top-level loops (toplevel-div == 8) we should get 43; considerably more copies of the loop (especially top-level ones). 44; 45; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 46; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \ 47; RUN: -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 48; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX2 49; 50; RUN: opt < %s -enable-unswitch-cost-multiplier=true \ 51; RUN: -unswitch-num-initial-unscaled-candidates=8 -unswitch-siblings-toplevel-div=8 \ 52; RUN: -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 53; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-RELAX2 54; 55; We get hundreds of copies of the loop when cost multiplier is disabled: 56; 57; RUN: opt < %s -enable-unswitch-cost-multiplier=false \ 58; RUN: -passes='loop(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 59; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX 60; 61; RUN: opt < %s -enable-unswitch-cost-multiplier=false \ 62; RUN: -passes='loop-mssa(unswitch<nontrivial>),print<loops>' -disable-output 2>&1 | \ 63; RUN: sort -b -k 1 | FileCheck %s --check-prefixes=LOOP-MAX 64 65; Single loop nest, not unswitched 66; LOOP1: Loop at depth 1 containing: 67; LOOP1-NOT: Loop at depth 1 containing: 68; LOOP1: Loop at depth 2 containing: 69; LOOP1-NOT: Loop at depth 2 containing: 70; 71; Somewhat relaxed restrictions on candidates: 72; LOOP-RELAX-COUNT-5: Loop at depth 1 containing: 73; LOOP-RELAX-NOT: Loop at depth 1 containing: 74; LOOP-RELAX-COUNT-32: Loop at depth 2 containing: 75; LOOP-RELAX-NOT: Loop at depth 2 containing: 76; 77; Even more relaxed restrictions on candidates and siblings. 78; LOOP-RELAX2-COUNT-11: Loop at depth 1 containing: 79; LOOP-RELAX2-NOT: Loop at depth 1 containing: 80; LOOP-RELAX2-COUNT-40: Loop at depth 2 containing: 81; LOOP-RELAX-NOT: Loop at depth 2 containing: 82; 83; Unswitched as much as it could (with multiplier disabled). 84; LOOP-MAX-COUNT-56: Loop at depth 1 containing: 85; LOOP-MAX-NOT: Loop at depth 1 containing: 86; LOOP-MAX-COUNT-111: Loop at depth 2 containing: 87; LOOP-MAX-NOT: Loop at depth 2 containing: 88 89define i32 @loop_switch(i32* %addr, i32 %c1, i32 %c2) { 90entry: 91 %addr1 = getelementptr i32, i32* %addr, i64 0 92 %addr2 = getelementptr i32, i32* %addr, i64 1 93 %check0 = icmp eq i32 %c2, 0 94 %check1 = icmp eq i32 %c2, 31 95 %check2 = icmp eq i32 %c2, 32 96 %check3 = icmp eq i32 %c2, 33 97 %check4 = icmp eq i32 %c2, 34 98 br label %outer_loop 99 100outer_loop: 101 %iv1 = phi i32 [0, %entry], [%iv1.next, %outer_latch] 102 %iv1.next = add i32 %iv1, 1 103 br label %inner_loop 104inner_loop: 105 %iv2 = phi i32 [0, %outer_loop], [%iv2.next, %inner_latch] 106 %iv2.next = add i32 %iv2, 1 107 switch i32 %c1, label %inner_latch [ 108 i32 0, label %case0 109 i32 1, label %case1 110 i32 2, label %case2 111 i32 3, label %case3 112 i32 4, label %case4 113 ] 114 115case4: 116 br i1 %check4, label %exit, label %inner_latch 117case3: 118 br i1 %check3, label %exit, label %inner_latch 119case2: 120 br i1 %check2, label %exit, label %inner_latch 121case1: 122 br i1 %check1, label %exit, label %inner_latch 123case0: 124 br i1 %check0, label %exit, label %inner_latch 125 126inner_latch: 127 store volatile i32 0, i32* %addr1 128 %test_inner = icmp slt i32 %iv2, 50 129 br i1 %test_inner, label %inner_loop, label %outer_latch 130 131outer_latch: 132 store volatile i32 0, i32* %addr2 133 %test_outer = icmp slt i32 %iv1, 50 134 br i1 %test_outer, label %outer_loop, label %exit 135 136exit: ; preds = %bci_0 137 ret i32 1 138} 139