1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS
3
4; Test the register stackifier pass.
5
6; We have two sets of tests, one with registers and implicit locals, and
7; a stack / explicit locals based version (NOREGS).
8
9target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
10target triple = "wasm32-unknown-unknown"
11
12; No because of pointer aliasing.
13
14; CHECK-LABEL: no0:
15; CHECK: return $1{{$}}
16; NOREGS-LABEL: no0:
17; NOREGS: return{{$}}
18define i32 @no0(i32* %p, i32* %q) {
19  %t = load i32, i32* %q
20  store i32 0, i32* %p
21  ret i32 %t
22}
23
24; No because of side effects.
25
26; CHECK-LABEL: no1:
27; CHECK: return $1{{$}}
28; NOREGS-LABEL: no1:
29; NOREGS: return{{$}}
30define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
31  %t = load volatile i32, i32* %q, !invariant.load !0
32  store volatile i32 0, i32* %p
33  ret i32 %t
34}
35
36; Yes because of invariant load and no side effects.
37
38; CHECK-LABEL: yes0:
39; CHECK: return $pop{{[0-9]+}}{{$}}
40; NOREGS-LABEL: yes0:
41; NOREGS: return{{$}}
42define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
43  %t = load i32, i32* %q, !invariant.load !0
44  store i32 0, i32* %p
45  ret i32 %t
46}
47
48; Yes because of no intervening side effects.
49
50; CHECK-LABEL: yes1:
51; CHECK: return $pop0{{$}}
52; NOREGS-LABEL: yes1:
53; NOREGS: return{{$}}
54define i32 @yes1(i32* %q) {
55  %t = load volatile i32, i32* %q
56  ret i32 %t
57}
58
59; Yes because undefined behavior can be sunk past a store.
60
61; CHECK-LABEL: sink_trap:
62; CHECK: return $pop{{[0-9]+}}{{$}}
63; NOREGS-LABEL: sink_trap:
64; NOREGS: return{{$}}
65define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
66  %t = sdiv i32 %x, %y
67  store volatile i32 0, i32* %p
68  ret i32 %t
69}
70
71; Yes because the call is readnone.
72
73; CHECK-LABEL: sink_readnone_call:
74; CHECK: return $pop1{{$}}
75; NOREGS-LABEL: sink_readnone_call:
76; NOREGS: return{{$}}
77declare i32 @readnone_callee() readnone nounwind
78define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
79  %t = call i32 @readnone_callee()
80  store volatile i32 0, i32* %p
81  ret i32 %t
82}
83
84; No because the call is readonly and there's an intervening store.
85
86; CHECK-LABEL: no_sink_readonly_call:
87; CHECK: return ${{[0-9]+}}{{$}}
88; NOREGS-LABEL: no_sink_readonly_call:
89; NOREGS: return{{$}}
90declare i32 @readonly_callee() readonly nounwind
91define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
92  %t = call i32 @readonly_callee()
93  store i32 0, i32* %p
94  ret i32 %t
95}
96
97; Don't schedule stack uses into the stack. To reduce register pressure, the
98; scheduler might be tempted to move the definition of $2 down. However, this
99; would risk getting incorrect liveness if the instructions are later
100; rearranged to make the stack contiguous.
101
102; CHECK-LABEL: stack_uses:
103; CHECK: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
104; CHECK-NEXT: block   {{$}}
105; CHECK-NEXT: i32.const   $push[[L13:[0-9]+]]=, 1{{$}}
106; CHECK-NEXT: i32.lt_s    $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}}
107; CHECK-NEXT: i32.const   $push[[L1:[0-9]+]]=, 2{{$}}
108; CHECK-NEXT: i32.lt_s    $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}}
109; CHECK-NEXT: i32.xor     $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}}
110; CHECK-NEXT: i32.const   $push[[L12:[0-9]+]]=, 1{{$}}
111; CHECK-NEXT: i32.lt_s    $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}}
112; CHECK-NEXT: i32.const   $push[[L11:[0-9]+]]=, 2{{$}}
113; CHECK-NEXT: i32.lt_s    $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
114; CHECK-NEXT: i32.xor     $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
115; CHECK-NEXT: i32.xor     $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}}
116; CHECK-NEXT: i32.const   $push10=, 1{{$}}
117; CHECK-NEXT: i32.ne      $push8=, $pop7, $pop10{{$}}
118; CHECK-NEXT: br_if       0, $pop8{{$}}
119; CHECK-NEXT: i32.const   $push9=, 0{{$}}
120; CHECK-NEXT: return      $pop9{{$}}
121; CHECK-NEXT: .LBB{{[0-9]+}}_2:
122; CHECK-NEXT: end_block{{$}}
123; CHECK-NEXT: i32.const   $push14=, 1{{$}}
124; CHECK-NEXT: return      $pop14{{$}}
125; NOREGS-LABEL: stack_uses:
126; NOREGS: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
127; NOREGS-NEXT: block {{$}}
128; NOREGS-NEXT: local.get 0{{$}}
129; NOREGS-NEXT: i32.const   1{{$}}
130; NOREGS-NEXT: i32.lt_s
131; NOREGS-NEXT: local.get 1{{$}}
132; NOREGS-NEXT: i32.const   2{{$}}
133; NOREGS-NEXT: i32.lt_s
134; NOREGS-NEXT: i32.xor {{$}}
135; NOREGS-NEXT: local.get 2{{$}}
136; NOREGS-NEXT: i32.const   1{{$}}
137; NOREGS-NEXT: i32.lt_s
138; NOREGS-NEXT: local.get 3{{$}}
139; NOREGS-NEXT: i32.const   2{{$}}
140; NOREGS-NEXT: i32.lt_s
141; NOREGS-NEXT: i32.xor {{$}}
142; NOREGS-NEXT: i32.xor {{$}}
143; NOREGS-NEXT: i32.const   1{{$}}
144; NOREGS-NEXT: i32.ne {{$}}
145; NOREGS-NEXT: br_if       0{{$}}
146; NOREGS-NEXT: i32.const   0{{$}}
147; NOREGS-NEXT: return{{$}}
148; NOREGS-NEXT: .LBB{{[0-9]+}}_2:
149; NOREGS-NEXT: end_block{{$}}
150; NOREGS-NEXT: i32.const   1{{$}}
151; NOREGS-NEXT: return{{$}}
152define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
153entry:
154  %c = icmp sle i32 %x, 0
155  %d = icmp sle i32 %y, 1
156  %e = icmp sle i32 %z, 0
157  %f = icmp sle i32 %w, 1
158  %g = xor i1 %c, %d
159  %h = xor i1 %e, %f
160  %i = xor i1 %g, %h
161  br i1 %i, label %true, label %false
162true:
163  ret i32 0
164false:
165  ret i32 1
166}
167
168; Test an interesting case where the load has multiple uses and cannot
169; be trivially stackified. However, it can be stackified with a local.tee.
170
171; CHECK-LABEL: multiple_uses:
172; CHECK: .functype multiple_uses (i32, i32, i32) -> (){{$}}
173; CHECK-NEXT: block   {{$}}
174; CHECK-NEXT: i32.load    $push[[NUM0:[0-9]+]]=, 0($2){{$}}
175; CHECK-NEXT: local.tee   $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}}
176; CHECK-NEXT: i32.ge_u    $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}}
177; CHECK-NEXT: br_if       0, $pop[[NUM2]]{{$}}
178; CHECK-NEXT: i32.lt_u    $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
179; CHECK-NEXT: br_if       0, $pop[[NUM3]]{{$}}
180; CHECK-NEXT: i32.store   0($2), $3{{$}}
181; CHECK-NEXT: .LBB{{[0-9]+}}_3:
182; CHECK-NEXT: end_block{{$}}
183; CHECK-NEXT: return{{$}}
184; NOREGS-LABEL: multiple_uses:
185; NOREGS: .functype multiple_uses (i32, i32, i32) -> (){{$}}
186; NOREGS: .local i32{{$}}
187; NOREGS-NEXT: block {{$}}
188; NOREGS-NEXT: local.get   2{{$}}
189; NOREGS-NEXT: i32.load    0{{$}}
190; NOREGS-NEXT: local.tee   3{{$}}
191; NOREGS-NEXT: local.get   1{{$}}
192; NOREGS-NEXT: i32.ge_u
193; NOREGS-NEXT: br_if       0{{$}}
194; NOREGS-NEXT: local.get   3{{$}}
195; NOREGS-NEXT: local.get   0{{$}}
196; NOREGS-NEXT: i32.lt_u
197; NOREGS-NEXT: br_if       0{{$}}
198; NOREGS-NEXT: local.get   2{{$}}
199; NOREGS-NEXT: local.get   3{{$}}
200; NOREGS-NEXT: i32.store   0{{$}}
201; NOREGS-NEXT: .LBB{{[0-9]+}}_3:
202; NOREGS-NEXT: end_block{{$}}
203; NOREGS-NEXT: return{{$}}
204define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
205bb:
206  br label %loop
207
208loop:
209  %tmp7 = load i32, i32* %arg2
210  %tmp8 = inttoptr i32 %tmp7 to i32*
211  %tmp9 = icmp uge i32* %tmp8, %arg1
212  %tmp10 = icmp ult i32* %tmp8, %arg0
213  %tmp11 = or i1 %tmp9, %tmp10
214  br i1 %tmp11, label %back, label %then
215
216then:
217  store i32 %tmp7, i32* %arg2
218  br label %back
219
220back:
221  br i1 undef, label %return, label %loop
222
223return:
224  ret void
225}
226
227; Don't stackify stores effects across other instructions with side effects.
228
229; CHECK:      side_effects:
230; CHECK:      store
231; CHECK:      call
232; CHECK:      store
233; CHECK-NEXT: call
234; NOREGS:      side_effects:
235; NOREGS:      store
236; NOREGS:      call
237; NOREGS:      store
238; NOREGS-NEXT: call
239declare void @evoke_side_effects()
240define hidden void @stackify_store_across_side_effects(double* nocapture %d) {
241entry:
242  store double 2.0, double* %d
243  call void @evoke_side_effects()
244  store double 2.0, double* %d
245  call void @evoke_side_effects()
246  ret void
247}
248
249; Div instructions have side effects and can't be reordered, but this entire
250; function should still be able to be stackified because it's already in
251; tree order.
252
253; CHECK-LABEL: div_tree:
254; CHECK: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
255; CHECK-NEXT: i32.div_s   $push[[L0:[0-9]+]]=, $0, $1{{$}}
256; CHECK-NEXT: i32.div_s   $push[[L1:[0-9]+]]=, $2, $3{{$}}
257; CHECK-NEXT: i32.div_s   $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
258; CHECK-NEXT: i32.div_s   $push[[L3:[0-9]+]]=, $4, $5{{$}}
259; CHECK-NEXT: i32.div_s   $push[[L4:[0-9]+]]=, $6, $7{{$}}
260; CHECK-NEXT: i32.div_s   $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
261; CHECK-NEXT: i32.div_s   $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}}
262; CHECK-NEXT: i32.div_s   $push[[L7:[0-9]+]]=, $8, $9{{$}}
263; CHECK-NEXT: i32.div_s   $push[[L8:[0-9]+]]=, $10, $11{{$}}
264; CHECK-NEXT: i32.div_s   $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
265; CHECK-NEXT: i32.div_s   $push[[L10:[0-9]+]]=, $12, $13{{$}}
266; CHECK-NEXT: i32.div_s   $push[[L11:[0-9]+]]=, $14, $15{{$}}
267; CHECK-NEXT: i32.div_s   $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}}
268; CHECK-NEXT: i32.div_s   $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}}
269; CHECK-NEXT: i32.div_s   $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}}
270; CHECK-NEXT: return      $pop[[L14]]{{$}}
271; NOREGS-LABEL: div_tree:
272; NOREGS: .functype div_tree (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32){{$}}
273; NOREGS-NEXT: local.get 0{{$}}
274; NOREGS-NEXT: local.get 1{{$}}
275; NOREGS-NEXT: i32.div_s{{$}}
276; NOREGS-NEXT: local.get 2{{$}}
277; NOREGS-NEXT: local.get 3{{$}}
278; NOREGS-NEXT: i32.div_s{{$}}
279; NOREGS-NEXT: i32.div_s{{$}}
280; NOREGS-NEXT: local.get 4{{$}}
281; NOREGS-NEXT: local.get 5{{$}}
282; NOREGS-NEXT: i32.div_s{{$}}
283; NOREGS-NEXT: local.get 6{{$}}
284; NOREGS-NEXT: local.get 7{{$}}
285; NOREGS-NEXT: i32.div_s{{$}}
286; NOREGS-NEXT: i32.div_s{{$}}
287; NOREGS-NEXT: i32.div_s{{$}}
288; NOREGS-NEXT: local.get 8{{$}}
289; NOREGS-NEXT: local.get 9{{$}}
290; NOREGS-NEXT: i32.div_s{{$}}
291; NOREGS-NEXT: local.get 10{{$}}
292; NOREGS-NEXT: local.get 11{{$}}
293; NOREGS-NEXT: i32.div_s{{$}}
294; NOREGS-NEXT: i32.div_s{{$}}
295; NOREGS-NEXT: local.get 12{{$}}
296; NOREGS-NEXT: local.get 13{{$}}
297; NOREGS-NEXT: i32.div_s{{$}}
298; NOREGS-NEXT: local.get 14{{$}}
299; NOREGS-NEXT: local.get 15{{$}}
300; NOREGS-NEXT: i32.div_s{{$}}
301; NOREGS-NEXT: i32.div_s{{$}}
302; NOREGS-NEXT: i32.div_s{{$}}
303; NOREGS-NEXT: i32.div_s{{$}}
304; NOREGS-NEXT: return{{$}}
305define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
306entry:
307  %div = sdiv i32 %a, %b
308  %div1 = sdiv i32 %c, %d
309  %div2 = sdiv i32 %div, %div1
310  %div3 = sdiv i32 %e, %f
311  %div4 = sdiv i32 %g, %h
312  %div5 = sdiv i32 %div3, %div4
313  %div6 = sdiv i32 %div2, %div5
314  %div7 = sdiv i32 %i, %j
315  %div8 = sdiv i32 %k, %l
316  %div9 = sdiv i32 %div7, %div8
317  %div10 = sdiv i32 %m, %n
318  %div11 = sdiv i32 %o, %p
319  %div12 = sdiv i32 %div10, %div11
320  %div13 = sdiv i32 %div9, %div12
321  %div14 = sdiv i32 %div6, %div13
322  ret i32 %div14
323}
324
325; A simple multiple-use case.
326
327; CHECK-LABEL: simple_multiple_use:
328; CHECK:       .functype simple_multiple_use (i32, i32) -> (){{$}}
329; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
330; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
331; CHECK-NEXT:  call        use_a, $pop[[NUM1]]{{$}}
332; CHECK-NEXT:  call        use_b, $[[NUM2]]{{$}}
333; CHECK-NEXT:  return{{$}}
334; NOREGS-LABEL: simple_multiple_use:
335; NOREGS:       .functype simple_multiple_use (i32, i32) -> (){{$}}
336; NOREGS-NEXT:  local.get 1{{$}}
337; NOREGS-NEXT:  local.get 0{{$}}
338; NOREGS-NEXT:  i32.mul
339; NOREGS-NEXT:  local.tee   1{{$}}
340; NOREGS-NEXT:  call        use_a{{$}}
341; NOREGS-NEXT:  local.get   1{{$}}
342; NOREGS-NEXT:  call        use_b{{$}}
343; NOREGS-NEXT:  return{{$}}
344declare void @use_a(i32)
345declare void @use_b(i32)
346define void @simple_multiple_use(i32 %x, i32 %y) {
347  %mul = mul i32 %y, %x
348  call void @use_a(i32 %mul)
349  call void @use_b(i32 %mul)
350  ret void
351}
352
353; Multiple uses of the same value in one instruction.
354
355; CHECK-LABEL: multiple_uses_in_same_insn:
356; CHECK:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
357; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
358; CHECK-NEXT:  local.tee   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
359; CHECK-NEXT:  call        use_2, $pop[[NUM1]], $[[NUM2]]{{$}}
360; CHECK-NEXT:  return{{$}}
361; NOREGS-LABEL: multiple_uses_in_same_insn:
362; NOREGS:       .functype multiple_uses_in_same_insn (i32, i32) -> (){{$}}
363; NOREGS-NEXT:  local.get 1{{$}}
364; NOREGS-NEXT:  local.get 0{{$}}
365; NOREGS-NEXT:  i32.mul
366; NOREGS-NEXT:  local.tee   1{{$}}
367; NOREGS-NEXT:  local.get   1{{$}}
368; NOREGS-NEXT:  call        use_2{{$}}
369; NOREGS-NEXT:  return{{$}}
370declare void @use_2(i32, i32)
371define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
372  %mul = mul i32 %y, %x
373  call void @use_2(i32 %mul, i32 %mul)
374  ret void
375}
376
377; Commute operands to achieve better stackifying.
378
379; CHECK-LABEL: commute:
380; CHECK:  .functype commute () -> (i32){{$}}
381; CHECK-NEXT:  call        $push0=, red{{$}}
382; CHECK-NEXT:  call        $push1=, green{{$}}
383; CHECK-NEXT:  i32.add     $push2=, $pop0, $pop1{{$}}
384; CHECK-NEXT:  call        $push3=, blue{{$}}
385; CHECK-NEXT:  i32.add     $push4=, $pop2, $pop3{{$}}
386; CHECK-NEXT:  return      $pop4{{$}}
387; NOREGS-LABEL: commute:
388; NOREGS:  .functype commute () -> (i32){{$}}
389; NOREGS-NEXT:  call        red{{$}}
390; NOREGS-NEXT:  call        green{{$}}
391; NOREGS-NEXT:  i32.add {{$}}
392; NOREGS-NEXT:  call        blue{{$}}
393; NOREGS-NEXT:  i32.add {{$}}
394; NOREGS-NEXT:  return{{$}}
395declare i32 @red()
396declare i32 @green()
397declare i32 @blue()
398define i32 @commute() {
399  %call = call i32 @red()
400  %call1 = call i32 @green()
401  %add = add i32 %call1, %call
402  %call2 = call i32 @blue()
403  %add3 = add i32 %add, %call2
404  ret i32 %add3
405}
406
407; Don't stackify a register when it would move a the def of the register past
408; an implicit local.get for the register.
409
410; CHECK-LABEL: no_stackify_past_use:
411; CHECK:      call            $1=, callee, $0
412; CHECK-NEXT: i32.const       $push0=, 1
413; CHECK-NEXT: i32.add         $push1=, $0, $pop0
414; CHECK-NEXT: call            $push2=, callee, $pop1
415; CHECK-NEXT: i32.sub         $push3=, $pop2, $1
416; CHECK-NEXT: i32.div_s       $push4=, $pop3, $1
417; CHECK-NEXT: return          $pop4
418; NOREGS-LABEL: no_stackify_past_use:
419; NOREGS:      local.get       0{{$}}
420; NOREGS-NEXT: call            callee
421; NOREGS-NEXT: local.set       1{{$}}
422; NOREGS-NEXT: local.get       0{{$}}
423; NOREGS-NEXT: i32.const       1
424; NOREGS-NEXT: i32.add
425; NOREGS-NEXT: call            callee
426; NOREGS-NEXT: local.get       1{{$}}
427; NOREGS-NEXT: i32.sub
428; NOREGS-NEXT: local.get       1{{$}}
429; NOREGS-NEXT: i32.div_s
430; NOREGS-NEXT: return
431declare i32 @callee(i32)
432define i32 @no_stackify_past_use(i32 %arg) {
433  %tmp1 = call i32 @callee(i32 %arg)
434  %tmp2 = add i32 %arg, 1
435  %tmp3 = call i32 @callee(i32 %tmp2)
436  %tmp5 = sub i32 %tmp3, %tmp1
437  %tmp6 = sdiv i32 %tmp5, %tmp1
438  ret i32 %tmp6
439}
440
441; This is the same as no_stackify_past_use, except using a commutative operator,
442; so we can reorder the operands and stackify.
443
444; CHECK-LABEL: commute_to_fix_ordering:
445; CHECK: call            $push[[L0:.+]]=, callee, $0
446; CHECK: local.tee       $push[[L1:.+]]=, $1=, $pop[[L0]]
447; CHECK: i32.const       $push0=, 1
448; CHECK: i32.add         $push1=, $0, $pop0
449; CHECK: call            $push2=, callee, $pop1
450; CHECK: i32.add         $push3=, $1, $pop2
451; CHECK: i32.mul         $push4=, $pop[[L1]], $pop3
452; CHECK: return          $pop4
453; NOREGS-LABEL: commute_to_fix_ordering:
454; NOREGS: local.get       0{{$}}
455; NOREGS: call            callee
456; NOREGS: local.tee       1
457; NOREGS: local.get       1{{$}}
458; NOREGS: local.get       0{{$}}
459; NOREGS: i32.const       1
460; NOREGS: i32.add
461; NOREGS: call            callee
462; NOREGS: i32.add
463; NOREGS: i32.mul
464; NOREGS: return
465define i32 @commute_to_fix_ordering(i32 %arg) {
466  %tmp1 = call i32 @callee(i32 %arg)
467  %tmp2 = add i32 %arg, 1
468  %tmp3 = call i32 @callee(i32 %tmp2)
469  %tmp5 = add i32 %tmp3, %tmp1
470  %tmp6 = mul i32 %tmp5, %tmp1
471  ret i32 %tmp6
472}
473
474; Stackify individual defs of virtual registers with multiple defs.
475
476; CHECK-LABEL: multiple_defs:
477; CHECK:        f64.add         $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}}
478; CHECK-NEXT:   local.tee       $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
479; CHECK-NEXT:   f64.select      $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
480; CHECK:        $[[NUM2]]=,
481; NOREGS-LABEL: multiple_defs:
482; NOREGS:        f64.add
483; NOREGS:        local.tee
484; NOREGS:        f64.select
485define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
486bb:
487  br label %bb5
488
489bb5:                                              ; preds = %bb21, %bb
490  %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ]
491  %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ]
492  %tmp7 = fcmp olt double %tmp6, 2.323450e+01
493  br i1 %tmp7, label %bb8, label %bb21
494
495bb8:                                              ; preds = %bb17, %bb5
496  %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ]
497  %tmp10 = fadd double %tmp6, -1.000000e+00
498  %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10
499  %tmp12 = fadd double %tmp11, %tmp9
500  br i1 %arg3, label %bb17, label %bb13
501
502bb13:                                             ; preds = %bb8
503  %tmp14 = or i32 %arg1, 2
504  %tmp15 = icmp eq i32 %tmp14, 14
505  %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000
506  br label %bb17
507
508bb17:                                             ; preds = %bb13, %bb8
509  %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ]
510  %tmp19 = fadd double %tmp18, %tmp12
511  %tmp20 = fcmp olt double %tmp6, 2.323450e+01
512  br i1 %tmp20, label %bb8, label %bb21
513
514bb21:                                             ; preds = %bb17, %bb5
515  %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ]
516  %tmp23 = fadd double %tmp6, 1.000000e+00
517  br i1 %arg4, label %exit, label %bb5
518exit:
519  ret void
520}
521
522; Don't move calls past loads
523; CHECK-LABEL: no_stackify_call_past_load:
524; CHECK: call $0=, red
525; CHECK: i32.const $push0=, 0
526; CHECK: i32.load $1=, count($pop0)
527; NOREGS-LABEL: no_stackify_call_past_load:
528; NOREGS: call red
529; NOREGS: i32.const 0
530; NOREGS: i32.load count
531@count = hidden global i32 0, align 4
532define i32 @no_stackify_call_past_load() {
533  %a = call i32 @red()
534  %b = load i32, i32* @count, align 4
535  call i32 @callee(i32 %a)
536  ret i32 %b
537  ; use of a
538}
539
540; Don't move stores past loads if there may be aliasing
541; CHECK-LABEL: no_stackify_store_past_load
542; CHECK: i32.store 0($1), $0
543; CHECK: i32.load {{.*}}, 0($2)
544; CHECK: call {{.*}}, callee, $0{{$}}
545; NOREGS-LABEL: no_stackify_store_past_load
546; NOREGS: i32.store 0
547; NOREGS: i32.load 0
548; NOREGS: call callee{{$}}
549define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
550  store i32 %a, i32* %p1
551  %b = load i32, i32* %p2, align 4
552  call i32 @callee(i32 %a)
553  ret i32 %b
554}
555
556; Can still stackify past invariant loads.
557; CHECK-LABEL: store_past_invar_load
558; CHECK: i32.store 0($1), $0
559; CHECK: call {{.*}}, callee, $0
560; CHECK: i32.load $push{{.*}}, 0($2)
561; CHECK: return $pop
562; NOREGS-LABEL: store_past_invar_load
563; NOREGS: i32.store 0
564; NOREGS: call callee
565; NOREGS: i32.load 0
566; NOREGS: return
567define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) {
568  store i32 %a, i32* %p1
569  %b = load i32, i32* %p2, !invariant.load !0
570  call i32 @callee(i32 %a)
571  ret i32 %b
572}
573
574; CHECK-LABEL: ignore_dbg_value:
575; CHECK:      .Lfunc_begin
576; CHECK:       unreachable
577; NOREGS-LABEL: ignore_dbg_value:
578; NOREGS:      .Lfunc_begin
579; NOREGS:       unreachable
580declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
581define void @ignore_dbg_value() {
582  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10
583  unreachable
584}
585
586; Don't stackify an expression that might use the stack into a return, since we
587; might insert a prologue before the return.
588
589; CHECK-LABEL: no_stackify_past_epilogue:
590; CHECK: return ${{[0-9]+}}{{$}}
591; NOREGS-LABEL: no_stackify_past_epilogue:
592; NOREGS: return{{$}}
593declare i32 @use_memory(i32*)
594define i32 @no_stackify_past_epilogue() {
595  %x = alloca i32
596  %call = call i32 @use_memory(i32* %x)
597  ret i32 %call
598}
599
600; Stackify a loop induction variable into a loop comparison.
601
602; CHECK-LABEL: stackify_indvar:
603; CHECK:             i32.const   $push[[L5:.+]]=, 1{{$}}
604; CHECK-NEXT:        i32.add     $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}}
605; CHECK-NEXT:        local.tee   $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}}
606; CHECK-NEXT:        i32.ne      $push[[L2:.+]]=, $0, $pop[[L3]]{{$}}
607; NOREGS-LABEL: stackify_indvar:
608; NOREGS:             i32.const   1{{$}}
609; NOREGS-NEXT:        i32.add
610; NOREGS-NEXT:        local.tee   2{{$}}
611; NOREGS-NEXT:        i32.ne
612define void @stackify_indvar(i32 %tmp, i32* %v) #0 {
613bb:
614  br label %bb3
615
616bb3:                                              ; preds = %bb3, %bb2
617  %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ]
618  %tmp5 = load volatile i32, i32* %v, align 4
619  %tmp6 = add nsw i32 %tmp5, %tmp4
620  store volatile i32 %tmp6, i32* %v, align 4
621  %tmp7 = add nuw nsw i32 %tmp4, 1
622  %tmp8 = icmp eq i32 %tmp7, %tmp
623  br i1 %tmp8, label %bb10, label %bb3
624
625bb10:                                             ; preds = %bb9, %bb
626  ret void
627}
628
629; Don't stackify a call past a __stack_pointer store.
630
631; CHECK-LABEL: stackpointer_dependency:
632; CHECK:      call {{.+}}, stackpointer_callee,
633; CHECK-NEXT: global.set __stack_pointer,
634; NOREGS-LABEL: stackpointer_dependency:
635; NOREGS:      call stackpointer_callee
636; NOREGS:      global.set __stack_pointer
637declare i32 @stackpointer_callee(i8* readnone, i8* readnone) nounwind readnone
638declare i8* @llvm.frameaddress(i32)
639define i32 @stackpointer_dependency(i8* readnone) {
640  %2 = tail call i8* @llvm.frameaddress(i32 0)
641  %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2)
642  ret i32 %3
643}
644
645; Stackify a call_indirect with respect to its ordering
646
647; CHECK-LABEL: call_indirect_stackify:
648; CHECK: i32.load  $push[[L4:.+]]=, 0($0)
649; CHECK-NEXT: local.tee $push[[L3:.+]]=, $0=, $pop[[L4]]
650; CHECK-NEXT: i32.load  $push[[L0:.+]]=, 0($0)
651; CHECK-NEXT: i32.load  $push[[L1:.+]]=, 0($pop[[L0]])
652; CHECK-NEXT: call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]]
653; NOREGS-LABEL: call_indirect_stackify:
654; NOREGS: i32.load  0
655; NOREGS-NEXT: local.tee 0
656; NOREGS:      i32.load  0
657; NOREGS-NEXT: i32.load  0
658; NOREGS-NEXT: call_indirect (i32, i32) -> (i32)
659%class.call_indirect = type { i32 (...)** }
660define i32 @call_indirect_stackify(%class.call_indirect** %objptr, i32 %arg) {
661  %obj = load %class.call_indirect*, %class.call_indirect** %objptr
662  %addr = bitcast %class.call_indirect* %obj to i32(%class.call_indirect*, i32)***
663  %vtable = load i32(%class.call_indirect*, i32)**, i32(%class.call_indirect*, i32)*** %addr
664  %vfn = getelementptr inbounds i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vtable, i32 0
665  %f = load i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vfn
666  %ret = call i32 %f(%class.call_indirect* %obj, i32 %arg)
667  ret i32 %ret
668}
669
670!llvm.module.flags = !{!0}
671!llvm.dbg.cu = !{!1}
672
673!0 = !{i32 2, !"Debug Info Version", i32 3}
674!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
675!2 = !DIFile(filename: "test.c", directory: "/")
676!3 = !{}
677!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3)
678!6 = !DISubroutineType(types: !3)
679!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8)
680!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
681!9 = !DIExpression()
682!10 = !DILocation(line: 15, column: 6, scope: !5)
683