1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s 2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS 3 4; Test the register stackifier pass. 5 6; We have two sets of tests, one with registers and implicit locals, and 7; a stack / explicit locals based version (NOREGS). 8 9target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 10target triple = "wasm32-unknown-unknown" 11 12; No because of pointer aliasing. 13 14; CHECK-LABEL: no0: 15; CHECK: return $1{{$}} 16; NOREGS-LABEL: no0: 17; NOREGS: return{{$}} 18define i32 @no0(i32* %p, i32* %q) { 19 %t = load i32, i32* %q 20 store i32 0, i32* %p 21 ret i32 %t 22} 23 24; No because of side effects. 25 26; CHECK-LABEL: no1: 27; CHECK: return $1{{$}} 28; NOREGS-LABEL: no1: 29; NOREGS: return{{$}} 30define i32 @no1(i32* %p, i32* dereferenceable(4) %q) { 31 %t = load volatile i32, i32* %q, !invariant.load !0 32 store volatile i32 0, i32* %p 33 ret i32 %t 34} 35 36; Yes because of invariant load and no side effects. 37 38; CHECK-LABEL: yes0: 39; CHECK: return $pop{{[0-9]+}}{{$}} 40; NOREGS-LABEL: yes0: 41; NOREGS: return{{$}} 42define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) { 43 %t = load i32, i32* %q, !invariant.load !0 44 store i32 0, i32* %p 45 ret i32 %t 46} 47 48; Yes because of no intervening side effects. 49 50; CHECK-LABEL: yes1: 51; CHECK: return $pop0{{$}} 52; NOREGS-LABEL: yes1: 53; NOREGS: return{{$}} 54define i32 @yes1(i32* %q) { 55 %t = load volatile i32, i32* %q 56 ret i32 %t 57} 58 59; Yes because undefined behavior can be sunk past a store. 60 61; CHECK-LABEL: sink_trap: 62; CHECK: return $pop{{[0-9]+}}{{$}} 63; NOREGS-LABEL: sink_trap: 64; NOREGS: return{{$}} 65define i32 @sink_trap(i32 %x, i32 %y, i32* %p) { 66 %t = sdiv i32 %x, %y 67 store volatile i32 0, i32* %p 68 ret i32 %t 69} 70 71; Yes because the call is readnone. 72 73; CHECK-LABEL: sink_readnone_call: 74; CHECK: return $pop0{{$}} 75; NOREGS-LABEL: sink_readnone_call: 76; NOREGS: return{{$}} 77declare i32 @readnone_callee() readnone nounwind 78define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) { 79 %t = call i32 @readnone_callee() 80 store volatile i32 0, i32* %p 81 ret i32 %t 82} 83 84; No because the call is readonly and there's an intervening store. 85 86; CHECK-LABEL: no_sink_readonly_call: 87; CHECK: return ${{[0-9]+}}{{$}} 88; NOREGS-LABEL: no_sink_readonly_call: 89; NOREGS: return{{$}} 90declare i32 @readonly_callee() readonly nounwind 91define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) { 92 %t = call i32 @readonly_callee() 93 store i32 0, i32* %p 94 ret i32 %t 95} 96 97; Don't schedule stack uses into the stack. To reduce register pressure, the 98; scheduler might be tempted to move the definition of $2 down. However, this 99; would risk getting incorrect liveness if the instructions are later 100; rearranged to make the stack contiguous. 101 102; CHECK-LABEL: stack_uses: 103; CHECK: .param i32, i32, i32, i32{{$}} 104; CHECK-NEXT: .result i32{{$}} 105; CHECK-NEXT: block {{$}} 106; CHECK-NEXT: i32.const $push[[L13:[0-9]+]]=, 1{{$}} 107; CHECK-NEXT: i32.lt_s $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}} 108; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 2{{$}} 109; CHECK-NEXT: i32.lt_s $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}} 110; CHECK-NEXT: i32.xor $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}} 111; CHECK-NEXT: i32.const $push[[L12:[0-9]+]]=, 1{{$}} 112; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}} 113; CHECK-NEXT: i32.const $push[[L11:[0-9]+]]=, 2{{$}} 114; CHECK-NEXT: i32.lt_s $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}} 115; CHECK-NEXT: i32.xor $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}} 116; CHECK-NEXT: i32.xor $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}} 117; CHECK-NEXT: i32.const $push10=, 1{{$}} 118; CHECK-NEXT: i32.ne $push8=, $pop7, $pop10{{$}} 119; CHECK-NEXT: br_if 0, $pop8{{$}} 120; CHECK-NEXT: i32.const $push9=, 0{{$}} 121; CHECK-NEXT: return $pop9{{$}} 122; CHECK-NEXT: .LBB7_2: 123; CHECK-NEXT: end_block{{$}} 124; CHECK-NEXT: i32.const $push14=, 1{{$}} 125; CHECK-NEXT: return $pop14{{$}} 126; NOREGS-LABEL: stack_uses: 127; NOREGS: .param i32, i32, i32, i32{{$}} 128; NOREGS-NEXT: .result i32{{$}} 129; NOREGS-NEXT: block {{$}} 130; NOREGS-NEXT: get_local 0{{$}} 131; NOREGS-NEXT: i32.const 1{{$}} 132; NOREGS-NEXT: i32.lt_s 133; NOREGS-NEXT: get_local 1{{$}} 134; NOREGS-NEXT: i32.const 2{{$}} 135; NOREGS-NEXT: i32.lt_s 136; NOREGS-NEXT: i32.xor {{$}} 137; NOREGS-NEXT: get_local 2{{$}} 138; NOREGS-NEXT: i32.const 1{{$}} 139; NOREGS-NEXT: i32.lt_s 140; NOREGS-NEXT: get_local 3{{$}} 141; NOREGS-NEXT: i32.const 2{{$}} 142; NOREGS-NEXT: i32.lt_s 143; NOREGS-NEXT: i32.xor {{$}} 144; NOREGS-NEXT: i32.xor {{$}} 145; NOREGS-NEXT: i32.const 1{{$}} 146; NOREGS-NEXT: i32.ne {{$}} 147; NOREGS-NEXT: br_if 0{{$}} 148; NOREGS-NEXT: i32.const 0{{$}} 149; NOREGS-NEXT: return{{$}} 150; NOREGS-NEXT: .LBB7_2: 151; NOREGS-NEXT: end_block{{$}} 152; NOREGS-NEXT: i32.const 1{{$}} 153; NOREGS-NEXT: return{{$}} 154define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) { 155entry: 156 %c = icmp sle i32 %x, 0 157 %d = icmp sle i32 %y, 1 158 %e = icmp sle i32 %z, 0 159 %f = icmp sle i32 %w, 1 160 %g = xor i1 %c, %d 161 %h = xor i1 %e, %f 162 %i = xor i1 %g, %h 163 br i1 %i, label %true, label %false 164true: 165 ret i32 0 166false: 167 ret i32 1 168} 169 170; Test an interesting case where the load has multiple uses and cannot 171; be trivially stackified. However, it can be stackified with a tee_local. 172 173; CHECK-LABEL: multiple_uses: 174; CHECK: .param i32, i32, i32{{$}} 175; CHECK-NEXT: block {{$}} 176; CHECK-NEXT: i32.load $push[[NUM0:[0-9]+]]=, 0($2){{$}} 177; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}} 178; CHECK-NEXT: i32.ge_u $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}} 179; CHECK-NEXT: br_if 0, $pop[[NUM2]]{{$}} 180; CHECK-NEXT: i32.lt_u $push[[NUM3:[0-9]+]]=, $3, $0{{$}} 181; CHECK-NEXT: br_if 0, $pop[[NUM3]]{{$}} 182; CHECK-NEXT: i32.store 0($2), $3{{$}} 183; CHECK-NEXT: .LBB8_3: 184; CHECK-NEXT: end_block{{$}} 185; CHECK-NEXT: return{{$}} 186; NOREGS-LABEL: multiple_uses: 187; NOREGS: .param i32, i32, i32{{$}} 188; NOREGS: .local i32{{$}} 189; NOREGS-NEXT: block {{$}} 190; NOREGS-NEXT: get_local 2{{$}} 191; NOREGS-NEXT: i32.load 0{{$}} 192; NOREGS-NEXT: tee_local 3{{$}} 193; NOREGS-NEXT: get_local 1{{$}} 194; NOREGS-NEXT: i32.ge_u 195; NOREGS-NEXT: br_if 0{{$}} 196; NOREGS-NEXT: get_local 3{{$}} 197; NOREGS-NEXT: get_local 0{{$}} 198; NOREGS-NEXT: i32.lt_u 199; NOREGS-NEXT: br_if 0{{$}} 200; NOREGS-NEXT: get_local 2{{$}} 201; NOREGS-NEXT: get_local 3{{$}} 202; NOREGS-NEXT: i32.store 0{{$}} 203; NOREGS-NEXT: .LBB8_3: 204; NOREGS-NEXT: end_block{{$}} 205; NOREGS-NEXT: return{{$}} 206define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind { 207bb: 208 br label %loop 209 210loop: 211 %tmp7 = load i32, i32* %arg2 212 %tmp8 = inttoptr i32 %tmp7 to i32* 213 %tmp9 = icmp uge i32* %tmp8, %arg1 214 %tmp10 = icmp ult i32* %tmp8, %arg0 215 %tmp11 = or i1 %tmp9, %tmp10 216 br i1 %tmp11, label %back, label %then 217 218then: 219 store i32 %tmp7, i32* %arg2 220 br label %back 221 222back: 223 br i1 undef, label %return, label %loop 224 225return: 226 ret void 227} 228 229; Don't stackify stores effects across other instructions with side effects. 230 231; CHECK: side_effects: 232; CHECK: store 233; CHECK-NEXT: call 234; CHECK: store 235; CHECK-NEXT: call 236; NOREGS: side_effects: 237; NOREGS: store 238; NOREGS-NEXT: call 239; NOREGS: store 240; NOREGS-NEXT: call 241declare void @evoke_side_effects() 242define hidden void @stackify_store_across_side_effects(double* nocapture %d) { 243entry: 244 store double 2.0, double* %d 245 call void @evoke_side_effects() 246 store double 2.0, double* %d 247 call void @evoke_side_effects() 248 ret void 249} 250 251; Div instructions have side effects and can't be reordered, but this entire 252; function should still be able to be stackified because it's already in 253; tree order. 254 255; CHECK-LABEL: div_tree: 256; CHECK: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}} 257; CHECK-NEXT: .result i32{{$}} 258; CHECK-NEXT: i32.div_s $push[[L0:[0-9]+]]=, $0, $1{{$}} 259; CHECK-NEXT: i32.div_s $push[[L1:[0-9]+]]=, $2, $3{{$}} 260; CHECK-NEXT: i32.div_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} 261; CHECK-NEXT: i32.div_s $push[[L3:[0-9]+]]=, $4, $5{{$}} 262; CHECK-NEXT: i32.div_s $push[[L4:[0-9]+]]=, $6, $7{{$}} 263; CHECK-NEXT: i32.div_s $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}} 264; CHECK-NEXT: i32.div_s $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}} 265; CHECK-NEXT: i32.div_s $push[[L7:[0-9]+]]=, $8, $9{{$}} 266; CHECK-NEXT: i32.div_s $push[[L8:[0-9]+]]=, $10, $11{{$}} 267; CHECK-NEXT: i32.div_s $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}} 268; CHECK-NEXT: i32.div_s $push[[L10:[0-9]+]]=, $12, $13{{$}} 269; CHECK-NEXT: i32.div_s $push[[L11:[0-9]+]]=, $14, $15{{$}} 270; CHECK-NEXT: i32.div_s $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}} 271; CHECK-NEXT: i32.div_s $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}} 272; CHECK-NEXT: i32.div_s $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}} 273; CHECK-NEXT: return $pop[[L14]]{{$}} 274; NOREGS-LABEL: div_tree: 275; NOREGS: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}} 276; NOREGS-NEXT: .result i32{{$}} 277; NOREGS-NEXT: get_local 0{{$}} 278; NOREGS-NEXT: get_local 1{{$}} 279; NOREGS-NEXT: i32.div_s{{$}} 280; NOREGS-NEXT: get_local 2{{$}} 281; NOREGS-NEXT: get_local 3{{$}} 282; NOREGS-NEXT: i32.div_s{{$}} 283; NOREGS-NEXT: i32.div_s{{$}} 284; NOREGS-NEXT: get_local 4{{$}} 285; NOREGS-NEXT: get_local 5{{$}} 286; NOREGS-NEXT: i32.div_s{{$}} 287; NOREGS-NEXT: get_local 6{{$}} 288; NOREGS-NEXT: get_local 7{{$}} 289; NOREGS-NEXT: i32.div_s{{$}} 290; NOREGS-NEXT: i32.div_s{{$}} 291; NOREGS-NEXT: i32.div_s{{$}} 292; NOREGS-NEXT: get_local 8{{$}} 293; NOREGS-NEXT: get_local 9{{$}} 294; NOREGS-NEXT: i32.div_s{{$}} 295; NOREGS-NEXT: get_local 10{{$}} 296; NOREGS-NEXT: get_local 11{{$}} 297; NOREGS-NEXT: i32.div_s{{$}} 298; NOREGS-NEXT: i32.div_s{{$}} 299; NOREGS-NEXT: get_local 12{{$}} 300; NOREGS-NEXT: get_local 13{{$}} 301; NOREGS-NEXT: i32.div_s{{$}} 302; NOREGS-NEXT: get_local 14{{$}} 303; NOREGS-NEXT: get_local 15{{$}} 304; NOREGS-NEXT: i32.div_s{{$}} 305; NOREGS-NEXT: i32.div_s{{$}} 306; NOREGS-NEXT: i32.div_s{{$}} 307; NOREGS-NEXT: i32.div_s{{$}} 308; NOREGS-NEXT: return{{$}} 309define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) { 310entry: 311 %div = sdiv i32 %a, %b 312 %div1 = sdiv i32 %c, %d 313 %div2 = sdiv i32 %div, %div1 314 %div3 = sdiv i32 %e, %f 315 %div4 = sdiv i32 %g, %h 316 %div5 = sdiv i32 %div3, %div4 317 %div6 = sdiv i32 %div2, %div5 318 %div7 = sdiv i32 %i, %j 319 %div8 = sdiv i32 %k, %l 320 %div9 = sdiv i32 %div7, %div8 321 %div10 = sdiv i32 %m, %n 322 %div11 = sdiv i32 %o, %p 323 %div12 = sdiv i32 %div10, %div11 324 %div13 = sdiv i32 %div9, %div12 325 %div14 = sdiv i32 %div6, %div13 326 ret i32 %div14 327} 328 329; A simple multiple-use case. 330 331; CHECK-LABEL: simple_multiple_use: 332; CHECK: .param i32, i32{{$}} 333; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} 334; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 335; CHECK-NEXT: call use_a@FUNCTION, $pop[[NUM1]]{{$}} 336; CHECK-NEXT: call use_b@FUNCTION, $[[NUM2]]{{$}} 337; CHECK-NEXT: return{{$}} 338; NOREGS-LABEL: simple_multiple_use: 339; NOREGS: .param i32, i32{{$}} 340; NOREGS-NEXT: get_local 1{{$}} 341; NOREGS-NEXT: get_local 0{{$}} 342; NOREGS-NEXT: i32.mul 343; NOREGS-NEXT: tee_local 1{{$}} 344; NOREGS-NEXT: call use_a@FUNCTION{{$}} 345; NOREGS-NEXT: get_local 1{{$}} 346; NOREGS-NEXT: call use_b@FUNCTION{{$}} 347; NOREGS-NEXT: return{{$}} 348declare void @use_a(i32) 349declare void @use_b(i32) 350define void @simple_multiple_use(i32 %x, i32 %y) { 351 %mul = mul i32 %y, %x 352 call void @use_a(i32 %mul) 353 call void @use_b(i32 %mul) 354 ret void 355} 356 357; Multiple uses of the same value in one instruction. 358 359; CHECK-LABEL: multiple_uses_in_same_insn: 360; CHECK: .param i32, i32{{$}} 361; CHECK-NEXT: i32.mul $push[[NUM0:[0-9]+]]=, $1, $0{{$}} 362; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 363; CHECK-NEXT: call use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}} 364; CHECK-NEXT: return{{$}} 365; NOREGS-LABEL: multiple_uses_in_same_insn: 366; NOREGS: .param i32, i32{{$}} 367; NOREGS-NEXT: get_local 1{{$}} 368; NOREGS-NEXT: get_local 0{{$}} 369; NOREGS-NEXT: i32.mul 370; NOREGS-NEXT: tee_local 1{{$}} 371; NOREGS-NEXT: get_local 1{{$}} 372; NOREGS-NEXT: call use_2@FUNCTION{{$}} 373; NOREGS-NEXT: return{{$}} 374declare void @use_2(i32, i32) 375define void @multiple_uses_in_same_insn(i32 %x, i32 %y) { 376 %mul = mul i32 %y, %x 377 call void @use_2(i32 %mul, i32 %mul) 378 ret void 379} 380 381; Commute operands to achieve better stackifying. 382 383; CHECK-LABEL: commute: 384; CHECK-NOT: param 385; CHECK: .result i32{{$}} 386; CHECK-NEXT: i32.call $push0=, red@FUNCTION{{$}} 387; CHECK-NEXT: i32.call $push1=, green@FUNCTION{{$}} 388; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}} 389; CHECK-NEXT: i32.call $push3=, blue@FUNCTION{{$}} 390; CHECK-NEXT: i32.add $push4=, $pop2, $pop3{{$}} 391; CHECK-NEXT: return $pop4{{$}} 392; NOREGS-LABEL: commute: 393; NOREGS-NOT: param 394; NOREGS: .result i32{{$}} 395; NOREGS-NEXT: i32.call red@FUNCTION{{$}} 396; NOREGS-NEXT: i32.call green@FUNCTION{{$}} 397; NOREGS-NEXT: i32.add {{$}} 398; NOREGS-NEXT: i32.call blue@FUNCTION{{$}} 399; NOREGS-NEXT: i32.add {{$}} 400; NOREGS-NEXT: return{{$}} 401declare i32 @red() 402declare i32 @green() 403declare i32 @blue() 404define i32 @commute() { 405 %call = call i32 @red() 406 %call1 = call i32 @green() 407 %add = add i32 %call1, %call 408 %call2 = call i32 @blue() 409 %add3 = add i32 %add, %call2 410 ret i32 %add3 411} 412 413; Don't stackify a register when it would move a the def of the register past 414; an implicit get_local for the register. 415 416; CHECK-LABEL: no_stackify_past_use: 417; CHECK: i32.call $1=, callee@FUNCTION, $0 418; CHECK-NEXT: i32.const $push0=, 1 419; CHECK-NEXT: i32.add $push1=, $0, $pop0 420; CHECK-NEXT: i32.call $push2=, callee@FUNCTION, $pop1 421; CHECK-NEXT: i32.sub $push3=, $pop2, $1 422; CHECK-NEXT: i32.div_s $push4=, $pop3, $1 423; CHECK-NEXT: return $pop4 424; NOREGS-LABEL: no_stackify_past_use: 425; NOREGS: get_local 0{{$}} 426; NOREGS-NEXT: i32.call callee@FUNCTION 427; NOREGS-NEXT: set_local 1{{$}} 428; NOREGS-NEXT: get_local 0{{$}} 429; NOREGS-NEXT: i32.const 1 430; NOREGS-NEXT: i32.add 431; NOREGS-NEXT: i32.call callee@FUNCTION 432; NOREGS-NEXT: get_local 1{{$}} 433; NOREGS-NEXT: i32.sub 434; NOREGS-NEXT: get_local 1{{$}} 435; NOREGS-NEXT: i32.div_s 436; NOREGS-NEXT: return 437declare i32 @callee(i32) 438define i32 @no_stackify_past_use(i32 %arg) { 439 %tmp1 = call i32 @callee(i32 %arg) 440 %tmp2 = add i32 %arg, 1 441 %tmp3 = call i32 @callee(i32 %tmp2) 442 %tmp5 = sub i32 %tmp3, %tmp1 443 %tmp6 = sdiv i32 %tmp5, %tmp1 444 ret i32 %tmp6 445} 446 447; This is the same as no_stackify_past_use, except using a commutative operator, 448; so we can reorder the operands and stackify. 449 450; CHECK-LABEL: commute_to_fix_ordering: 451; CHECK: i32.call $push[[L0:.+]]=, callee@FUNCTION, $0 452; CHECK: tee_local $push[[L1:.+]]=, $1=, $pop[[L0]] 453; CHECK: i32.const $push0=, 1 454; CHECK: i32.add $push1=, $0, $pop0 455; CHECK: i32.call $push2=, callee@FUNCTION, $pop1 456; CHECK: i32.add $push3=, $1, $pop2 457; CHECK: i32.mul $push4=, $pop[[L1]], $pop3 458; CHECK: return $pop4 459; NOREGS-LABEL: commute_to_fix_ordering: 460; NOREGS: get_local 0{{$}} 461; NOREGS: i32.call callee@FUNCTION 462; NOREGS: tee_local 1 463; NOREGS: get_local 1{{$}} 464; NOREGS: get_local 0{{$}} 465; NOREGS: i32.const 1 466; NOREGS: i32.add 467; NOREGS: i32.call callee@FUNCTION 468; NOREGS: i32.add 469; NOREGS: i32.mul 470; NOREGS: return 471define i32 @commute_to_fix_ordering(i32 %arg) { 472 %tmp1 = call i32 @callee(i32 %arg) 473 %tmp2 = add i32 %arg, 1 474 %tmp3 = call i32 @callee(i32 %tmp2) 475 %tmp5 = add i32 %tmp3, %tmp1 476 %tmp6 = mul i32 %tmp5, %tmp1 477 ret i32 %tmp6 478} 479 480; Stackify individual defs of virtual registers with multiple defs. 481 482; CHECK-LABEL: multiple_defs: 483; CHECK: f64.add $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}} 484; CHECK-NEXT: tee_local $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}} 485; CHECK-NEXT: f64.select $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}} 486; CHECK: $[[NUM2]]=, 487; NOREGS-LABEL: multiple_defs: 488; NOREGS: f64.add 489; NOREGS: tee_local 490; NOREGS: f64.select 491define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) { 492bb: 493 br label %bb5 494 495bb5: ; preds = %bb21, %bb 496 %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ] 497 %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ] 498 %tmp7 = fcmp olt double %tmp6, 2.323450e+01 499 br i1 %tmp7, label %bb8, label %bb21 500 501bb8: ; preds = %bb17, %bb5 502 %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ] 503 %tmp10 = fadd double %tmp6, -1.000000e+00 504 %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10 505 %tmp12 = fadd double %tmp11, %tmp9 506 br i1 %arg3, label %bb17, label %bb13 507 508bb13: ; preds = %bb8 509 %tmp14 = or i32 %arg1, 2 510 %tmp15 = icmp eq i32 %tmp14, 14 511 %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000 512 br label %bb17 513 514bb17: ; preds = %bb13, %bb8 515 %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ] 516 %tmp19 = fadd double %tmp18, %tmp12 517 %tmp20 = fcmp olt double %tmp6, 2.323450e+01 518 br i1 %tmp20, label %bb8, label %bb21 519 520bb21: ; preds = %bb17, %bb5 521 %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ] 522 %tmp23 = fadd double %tmp6, 1.000000e+00 523 br i1 %arg4, label %exit, label %bb5 524exit: 525 ret void 526} 527 528; Don't move calls past loads 529; CHECK-LABEL: no_stackify_call_past_load: 530; CHECK: i32.call $0=, red 531; CHECK: i32.const $push0=, 0 532; CHECK: i32.load $1=, count($pop0) 533; NOREGS-LABEL: no_stackify_call_past_load: 534; NOREGS: i32.call red 535; NOREGS: i32.const 0 536; NOREGS: i32.load count 537@count = hidden global i32 0, align 4 538define i32 @no_stackify_call_past_load() { 539 %a = call i32 @red() 540 %b = load i32, i32* @count, align 4 541 call i32 @callee(i32 %a) 542 ret i32 %b 543 ; use of a 544} 545 546; Don't move stores past loads if there may be aliasing 547; CHECK-LABEL: no_stackify_store_past_load 548; CHECK: i32.store 0($1), $0 549; CHECK: i32.load {{.*}}, 0($2) 550; CHECK: i32.call {{.*}}, callee@FUNCTION, $0{{$}} 551; NOREGS-LABEL: no_stackify_store_past_load 552; NOREGS: i32.store 0 553; NOREGS: i32.load 0 554; NOREGS: i32.call callee@FUNCTION{{$}} 555define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) { 556 store i32 %a, i32* %p1 557 %b = load i32, i32* %p2, align 4 558 call i32 @callee(i32 %a) 559 ret i32 %b 560} 561 562; Can still stackify past invariant loads. 563; CHECK-LABEL: store_past_invar_load 564; CHECK: i32.store 0($1), $0 565; CHECK: i32.call {{.*}}, callee@FUNCTION, $0 566; CHECK: i32.load $push{{.*}}, 0($2) 567; CHECK: return $pop 568; NOREGS-LABEL: store_past_invar_load 569; NOREGS: i32.store 0 570; NOREGS: i32.call callee@FUNCTION 571; NOREGS: i32.load 0 572; NOREGS: return 573define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) { 574 store i32 %a, i32* %p1 575 %b = load i32, i32* %p2, !invariant.load !0 576 call i32 @callee(i32 %a) 577 ret i32 %b 578} 579 580; CHECK-LABEL: ignore_dbg_value: 581; CHECK-NEXT: .Lfunc_begin 582; CHECK-NEXT: unreachable 583; NOREGS-LABEL: ignore_dbg_value: 584; NOREGS-NEXT: .Lfunc_begin 585; NOREGS-NEXT: unreachable 586declare void @llvm.dbg.value(metadata, i64, metadata, metadata) 587define void @ignore_dbg_value() { 588 call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10 589 unreachable 590} 591 592; Don't stackify an expression that might use the stack into a return, since we 593; might insert a prologue before the return. 594 595; CHECK-LABEL: no_stackify_past_epilogue: 596; CHECK: return ${{[0-9]+}}{{$}} 597; NOREGS-LABEL: no_stackify_past_epilogue: 598; NOREGS: return{{$}} 599declare i32 @use_memory(i32*) 600define i32 @no_stackify_past_epilogue() { 601 %x = alloca i32 602 %call = call i32 @use_memory(i32* %x) 603 ret i32 %call 604} 605 606; Stackify a loop induction variable into a loop comparison. 607 608; CHECK-LABEL: stackify_indvar: 609; CHECK: i32.const $push[[L5:.+]]=, 1{{$}} 610; CHECK-NEXT: i32.add $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}} 611; CHECK-NEXT: tee_local $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}} 612; CHECK-NEXT: i32.ne $push[[L2:.+]]=, $0, $pop[[L3]]{{$}} 613; NOREGS-LABEL: stackify_indvar: 614; NOREGS: i32.const 1{{$}} 615; NOREGS-NEXT: i32.add 616; NOREGS-NEXT: tee_local 2{{$}} 617; NOREGS-NEXT: i32.ne 618define void @stackify_indvar(i32 %tmp, i32* %v) #0 { 619bb: 620 br label %bb3 621 622bb3: ; preds = %bb3, %bb2 623 %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ] 624 %tmp5 = load volatile i32, i32* %v, align 4 625 %tmp6 = add nsw i32 %tmp5, %tmp4 626 store volatile i32 %tmp6, i32* %v, align 4 627 %tmp7 = add nuw nsw i32 %tmp4, 1 628 %tmp8 = icmp eq i32 %tmp7, %tmp 629 br i1 %tmp8, label %bb10, label %bb3 630 631bb10: ; preds = %bb9, %bb 632 ret void 633} 634 635; Don't stackify a call past a __stack_pointer store. 636 637; CHECK-LABEL: stackpointer_dependency: 638; CHECK: call {{.+}}, stackpointer_callee@FUNCTION, 639; CHECK-NEXT: set_global __stack_pointer@GLOBAL, 640; NOREGS-LABEL: stackpointer_dependency: 641; NOREGS: call stackpointer_callee@FUNCTION 642; NOREGS: set_global __stack_pointer 643declare i32 @stackpointer_callee(i8* readnone, i8* readnone) 644declare i8* @llvm.frameaddress(i32) 645define i32 @stackpointer_dependency(i8* readnone) { 646 %2 = tail call i8* @llvm.frameaddress(i32 0) 647 %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2) 648 ret i32 %3 649} 650 651; Stackify a call_indirect with respect to its ordering 652 653; CHECK-LABEL: call_indirect_stackify: 654; CHECK: i32.load $push[[L4:.+]]=, 0($0) 655; CHECK-NEXT: tee_local $push[[L3:.+]]=, $0=, $pop[[L4]] 656; CHECK-NEXT: i32.load $push[[L0:.+]]=, 0($0) 657; CHECK-NEXT: i32.load $push[[L1:.+]]=, 0($pop[[L0]]) 658; CHECK-NEXT: i32.call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]] 659; NOREGS-LABEL: call_indirect_stackify: 660; NOREGS: i32.load 0 661; NOREGS-NEXT: tee_local 0 662; NOREGS: i32.load 0 663; NOREGS-NEXT: i32.load 0 664; NOREGS-NEXT: i32.call_indirect 665%class.call_indirect = type { i32 (...)** } 666define i32 @call_indirect_stackify(%class.call_indirect** %objptr, i32 %arg) { 667 %obj = load %class.call_indirect*, %class.call_indirect** %objptr 668 %addr = bitcast %class.call_indirect* %obj to i32(%class.call_indirect*, i32)*** 669 %vtable = load i32(%class.call_indirect*, i32)**, i32(%class.call_indirect*, i32)*** %addr 670 %vfn = getelementptr inbounds i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vtable, i32 0 671 %f = load i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vfn 672 %ret = call i32 %f(%class.call_indirect* %obj, i32 %arg) 673 ret i32 %ret 674} 675 676!llvm.module.flags = !{!0} 677!llvm.dbg.cu = !{!1} 678 679!0 = !{i32 2, !"Debug Info Version", i32 3} 680!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3) 681!2 = !DIFile(filename: "test.c", directory: "/") 682!3 = !{} 683!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3) 684!6 = !DISubroutineType(types: !3) 685!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8) 686!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) 687!9 = !DIExpression() 688!10 = !DILocation(line: 15, column: 6, scope: !5) 689 690