1; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS
3
4; Test the register stackifier pass.
5
6; We have two sets of tests, one with registers and implicit locals, and
7; a stack / explicit locals based version (NOREGS).
8
9target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
10target triple = "wasm32-unknown-unknown"
11
12; No because of pointer aliasing.
13
14; CHECK-LABEL: no0:
15; CHECK: return $1{{$}}
16; NOREGS-LABEL: no0:
17; NOREGS: return{{$}}
18define i32 @no0(i32* %p, i32* %q) {
19  %t = load i32, i32* %q
20  store i32 0, i32* %p
21  ret i32 %t
22}
23
24; No because of side effects.
25
26; CHECK-LABEL: no1:
27; CHECK: return $1{{$}}
28; NOREGS-LABEL: no1:
29; NOREGS: return{{$}}
30define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
31  %t = load volatile i32, i32* %q, !invariant.load !0
32  store volatile i32 0, i32* %p
33  ret i32 %t
34}
35
36; Yes because of invariant load and no side effects.
37
38; CHECK-LABEL: yes0:
39; CHECK: return $pop{{[0-9]+}}{{$}}
40; NOREGS-LABEL: yes0:
41; NOREGS: return{{$}}
42define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
43  %t = load i32, i32* %q, !invariant.load !0
44  store i32 0, i32* %p
45  ret i32 %t
46}
47
48; Yes because of no intervening side effects.
49
50; CHECK-LABEL: yes1:
51; CHECK: return $pop0{{$}}
52; NOREGS-LABEL: yes1:
53; NOREGS: return{{$}}
54define i32 @yes1(i32* %q) {
55  %t = load volatile i32, i32* %q
56  ret i32 %t
57}
58
59; Yes because undefined behavior can be sunk past a store.
60
61; CHECK-LABEL: sink_trap:
62; CHECK: return $pop{{[0-9]+}}{{$}}
63; NOREGS-LABEL: sink_trap:
64; NOREGS: return{{$}}
65define i32 @sink_trap(i32 %x, i32 %y, i32* %p) {
66  %t = sdiv i32 %x, %y
67  store volatile i32 0, i32* %p
68  ret i32 %t
69}
70
71; Yes because the call is readnone.
72
73; CHECK-LABEL: sink_readnone_call:
74; CHECK: return $pop0{{$}}
75; NOREGS-LABEL: sink_readnone_call:
76; NOREGS: return{{$}}
77declare i32 @readnone_callee() readnone nounwind
78define i32 @sink_readnone_call(i32 %x, i32 %y, i32* %p) {
79  %t = call i32 @readnone_callee()
80  store volatile i32 0, i32* %p
81  ret i32 %t
82}
83
84; No because the call is readonly and there's an intervening store.
85
86; CHECK-LABEL: no_sink_readonly_call:
87; CHECK: return ${{[0-9]+}}{{$}}
88; NOREGS-LABEL: no_sink_readonly_call:
89; NOREGS: return{{$}}
90declare i32 @readonly_callee() readonly nounwind
91define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
92  %t = call i32 @readonly_callee()
93  store i32 0, i32* %p
94  ret i32 %t
95}
96
97; Don't schedule stack uses into the stack. To reduce register pressure, the
98; scheduler might be tempted to move the definition of $2 down. However, this
99; would risk getting incorrect liveness if the instructions are later
100; rearranged to make the stack contiguous.
101
102; CHECK-LABEL: stack_uses:
103; CHECK: .param i32, i32, i32, i32{{$}}
104; CHECK-NEXT: .result i32{{$}}
105; CHECK-NEXT: block   {{$}}
106; CHECK-NEXT: i32.const   $push[[L13:[0-9]+]]=, 1{{$}}
107; CHECK-NEXT: i32.lt_s    $push[[L0:[0-9]+]]=, $0, $pop[[L13]]{{$}}
108; CHECK-NEXT: i32.const   $push[[L1:[0-9]+]]=, 2{{$}}
109; CHECK-NEXT: i32.lt_s    $push[[L2:[0-9]+]]=, $1, $pop[[L1]]{{$}}
110; CHECK-NEXT: i32.xor     $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L2]]{{$}}
111; CHECK-NEXT: i32.const   $push[[L12:[0-9]+]]=, 1{{$}}
112; CHECK-NEXT: i32.lt_s    $push[[L3:[0-9]+]]=, $2, $pop[[L12]]{{$}}
113; CHECK-NEXT: i32.const   $push[[L11:[0-9]+]]=, 2{{$}}
114; CHECK-NEXT: i32.lt_s    $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
115; CHECK-NEXT: i32.xor     $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
116; CHECK-NEXT: i32.xor     $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}}
117; CHECK-NEXT: i32.const   $push10=, 1{{$}}
118; CHECK-NEXT: i32.ne      $push8=, $pop7, $pop10{{$}}
119; CHECK-NEXT: br_if       0, $pop8{{$}}
120; CHECK-NEXT: i32.const   $push9=, 0{{$}}
121; CHECK-NEXT: return      $pop9{{$}}
122; CHECK-NEXT: .LBB7_2:
123; CHECK-NEXT: end_block{{$}}
124; CHECK-NEXT: i32.const   $push14=, 1{{$}}
125; CHECK-NEXT: return      $pop14{{$}}
126; NOREGS-LABEL: stack_uses:
127; NOREGS: .param i32, i32, i32, i32{{$}}
128; NOREGS-NEXT: .result i32{{$}}
129; NOREGS-NEXT: block {{$}}
130; NOREGS-NEXT: get_local 0{{$}}
131; NOREGS-NEXT: i32.const   1{{$}}
132; NOREGS-NEXT: i32.lt_s
133; NOREGS-NEXT: get_local 1{{$}}
134; NOREGS-NEXT: i32.const   2{{$}}
135; NOREGS-NEXT: i32.lt_s
136; NOREGS-NEXT: i32.xor {{$}}
137; NOREGS-NEXT: get_local 2{{$}}
138; NOREGS-NEXT: i32.const   1{{$}}
139; NOREGS-NEXT: i32.lt_s
140; NOREGS-NEXT: get_local 3{{$}}
141; NOREGS-NEXT: i32.const   2{{$}}
142; NOREGS-NEXT: i32.lt_s
143; NOREGS-NEXT: i32.xor {{$}}
144; NOREGS-NEXT: i32.xor {{$}}
145; NOREGS-NEXT: i32.const   1{{$}}
146; NOREGS-NEXT: i32.ne {{$}}
147; NOREGS-NEXT: br_if       0{{$}}
148; NOREGS-NEXT: i32.const   0{{$}}
149; NOREGS-NEXT: return{{$}}
150; NOREGS-NEXT: .LBB7_2:
151; NOREGS-NEXT: end_block{{$}}
152; NOREGS-NEXT: i32.const   1{{$}}
153; NOREGS-NEXT: return{{$}}
154define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
155entry:
156  %c = icmp sle i32 %x, 0
157  %d = icmp sle i32 %y, 1
158  %e = icmp sle i32 %z, 0
159  %f = icmp sle i32 %w, 1
160  %g = xor i1 %c, %d
161  %h = xor i1 %e, %f
162  %i = xor i1 %g, %h
163  br i1 %i, label %true, label %false
164true:
165  ret i32 0
166false:
167  ret i32 1
168}
169
170; Test an interesting case where the load has multiple uses and cannot
171; be trivially stackified. However, it can be stackified with a tee_local.
172
173; CHECK-LABEL: multiple_uses:
174; CHECK: .param       i32, i32, i32{{$}}
175; CHECK-NEXT: block   {{$}}
176; CHECK-NEXT: i32.load    $push[[NUM0:[0-9]+]]=, 0($2){{$}}
177; CHECK-NEXT: tee_local   $push[[NUM1:[0-9]+]]=, $3=, $pop[[NUM0]]{{$}}
178; CHECK-NEXT: i32.ge_u    $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $1{{$}}
179; CHECK-NEXT: br_if       0, $pop[[NUM2]]{{$}}
180; CHECK-NEXT: i32.lt_u    $push[[NUM3:[0-9]+]]=, $3, $0{{$}}
181; CHECK-NEXT: br_if       0, $pop[[NUM3]]{{$}}
182; CHECK-NEXT: i32.store   0($2), $3{{$}}
183; CHECK-NEXT: .LBB8_3:
184; CHECK-NEXT: end_block{{$}}
185; CHECK-NEXT: return{{$}}
186; NOREGS-LABEL: multiple_uses:
187; NOREGS: .param       i32, i32, i32{{$}}
188; NOREGS: .local i32{{$}}
189; NOREGS-NEXT: block {{$}}
190; NOREGS-NEXT: get_local   2{{$}}
191; NOREGS-NEXT: i32.load    0{{$}}
192; NOREGS-NEXT: tee_local   3{{$}}
193; NOREGS-NEXT: get_local   1{{$}}
194; NOREGS-NEXT: i32.ge_u
195; NOREGS-NEXT: br_if       0{{$}}
196; NOREGS-NEXT: get_local   3{{$}}
197; NOREGS-NEXT: get_local   0{{$}}
198; NOREGS-NEXT: i32.lt_u
199; NOREGS-NEXT: br_if       0{{$}}
200; NOREGS-NEXT: get_local   2{{$}}
201; NOREGS-NEXT: get_local   3{{$}}
202; NOREGS-NEXT: i32.store   0{{$}}
203; NOREGS-NEXT: .LBB8_3:
204; NOREGS-NEXT: end_block{{$}}
205; NOREGS-NEXT: return{{$}}
206define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
207bb:
208  br label %loop
209
210loop:
211  %tmp7 = load i32, i32* %arg2
212  %tmp8 = inttoptr i32 %tmp7 to i32*
213  %tmp9 = icmp uge i32* %tmp8, %arg1
214  %tmp10 = icmp ult i32* %tmp8, %arg0
215  %tmp11 = or i1 %tmp9, %tmp10
216  br i1 %tmp11, label %back, label %then
217
218then:
219  store i32 %tmp7, i32* %arg2
220  br label %back
221
222back:
223  br i1 undef, label %return, label %loop
224
225return:
226  ret void
227}
228
229; Don't stackify stores effects across other instructions with side effects.
230
231; CHECK:      side_effects:
232; CHECK:      store
233; CHECK-NEXT: call
234; CHECK:      store
235; CHECK-NEXT: call
236; NOREGS:      side_effects:
237; NOREGS:      store
238; NOREGS-NEXT: call
239; NOREGS:      store
240; NOREGS-NEXT: call
241declare void @evoke_side_effects()
242define hidden void @stackify_store_across_side_effects(double* nocapture %d) {
243entry:
244  store double 2.0, double* %d
245  call void @evoke_side_effects()
246  store double 2.0, double* %d
247  call void @evoke_side_effects()
248  ret void
249}
250
251; Div instructions have side effects and can't be reordered, but this entire
252; function should still be able to be stackified because it's already in
253; tree order.
254
255; CHECK-LABEL: div_tree:
256; CHECK: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}}
257; CHECK-NEXT: .result     i32{{$}}
258; CHECK-NEXT: i32.div_s   $push[[L0:[0-9]+]]=, $0, $1{{$}}
259; CHECK-NEXT: i32.div_s   $push[[L1:[0-9]+]]=, $2, $3{{$}}
260; CHECK-NEXT: i32.div_s   $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}}
261; CHECK-NEXT: i32.div_s   $push[[L3:[0-9]+]]=, $4, $5{{$}}
262; CHECK-NEXT: i32.div_s   $push[[L4:[0-9]+]]=, $6, $7{{$}}
263; CHECK-NEXT: i32.div_s   $push[[L5:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
264; CHECK-NEXT: i32.div_s   $push[[L6:[0-9]+]]=, $pop[[L2]], $pop[[L5]]{{$}}
265; CHECK-NEXT: i32.div_s   $push[[L7:[0-9]+]]=, $8, $9{{$}}
266; CHECK-NEXT: i32.div_s   $push[[L8:[0-9]+]]=, $10, $11{{$}}
267; CHECK-NEXT: i32.div_s   $push[[L9:[0-9]+]]=, $pop[[L7]], $pop[[L8]]{{$}}
268; CHECK-NEXT: i32.div_s   $push[[L10:[0-9]+]]=, $12, $13{{$}}
269; CHECK-NEXT: i32.div_s   $push[[L11:[0-9]+]]=, $14, $15{{$}}
270; CHECK-NEXT: i32.div_s   $push[[L12:[0-9]+]]=, $pop[[L10]], $pop[[L11]]{{$}}
271; CHECK-NEXT: i32.div_s   $push[[L13:[0-9]+]]=, $pop[[L9]], $pop[[L12]]{{$}}
272; CHECK-NEXT: i32.div_s   $push[[L14:[0-9]+]]=, $pop[[L6]], $pop[[L13]]{{$}}
273; CHECK-NEXT: return      $pop[[L14]]{{$}}
274; NOREGS-LABEL: div_tree:
275; NOREGS: .param i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32{{$}}
276; NOREGS-NEXT: .result     i32{{$}}
277; NOREGS-NEXT: get_local 0{{$}}
278; NOREGS-NEXT: get_local 1{{$}}
279; NOREGS-NEXT: i32.div_s{{$}}
280; NOREGS-NEXT: get_local 2{{$}}
281; NOREGS-NEXT: get_local 3{{$}}
282; NOREGS-NEXT: i32.div_s{{$}}
283; NOREGS-NEXT: i32.div_s{{$}}
284; NOREGS-NEXT: get_local 4{{$}}
285; NOREGS-NEXT: get_local 5{{$}}
286; NOREGS-NEXT: i32.div_s{{$}}
287; NOREGS-NEXT: get_local 6{{$}}
288; NOREGS-NEXT: get_local 7{{$}}
289; NOREGS-NEXT: i32.div_s{{$}}
290; NOREGS-NEXT: i32.div_s{{$}}
291; NOREGS-NEXT: i32.div_s{{$}}
292; NOREGS-NEXT: get_local 8{{$}}
293; NOREGS-NEXT: get_local 9{{$}}
294; NOREGS-NEXT: i32.div_s{{$}}
295; NOREGS-NEXT: get_local 10{{$}}
296; NOREGS-NEXT: get_local 11{{$}}
297; NOREGS-NEXT: i32.div_s{{$}}
298; NOREGS-NEXT: i32.div_s{{$}}
299; NOREGS-NEXT: get_local 12{{$}}
300; NOREGS-NEXT: get_local 13{{$}}
301; NOREGS-NEXT: i32.div_s{{$}}
302; NOREGS-NEXT: get_local 14{{$}}
303; NOREGS-NEXT: get_local 15{{$}}
304; NOREGS-NEXT: i32.div_s{{$}}
305; NOREGS-NEXT: i32.div_s{{$}}
306; NOREGS-NEXT: i32.div_s{{$}}
307; NOREGS-NEXT: i32.div_s{{$}}
308; NOREGS-NEXT: return{{$}}
309define i32 @div_tree(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) {
310entry:
311  %div = sdiv i32 %a, %b
312  %div1 = sdiv i32 %c, %d
313  %div2 = sdiv i32 %div, %div1
314  %div3 = sdiv i32 %e, %f
315  %div4 = sdiv i32 %g, %h
316  %div5 = sdiv i32 %div3, %div4
317  %div6 = sdiv i32 %div2, %div5
318  %div7 = sdiv i32 %i, %j
319  %div8 = sdiv i32 %k, %l
320  %div9 = sdiv i32 %div7, %div8
321  %div10 = sdiv i32 %m, %n
322  %div11 = sdiv i32 %o, %p
323  %div12 = sdiv i32 %div10, %div11
324  %div13 = sdiv i32 %div9, %div12
325  %div14 = sdiv i32 %div6, %div13
326  ret i32 %div14
327}
328
329; A simple multiple-use case.
330
331; CHECK-LABEL: simple_multiple_use:
332; CHECK:  .param      i32, i32{{$}}
333; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
334; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
335; CHECK-NEXT:  call        use_a@FUNCTION, $pop[[NUM1]]{{$}}
336; CHECK-NEXT:  call        use_b@FUNCTION, $[[NUM2]]{{$}}
337; CHECK-NEXT:  return{{$}}
338; NOREGS-LABEL: simple_multiple_use:
339; NOREGS:  .param      i32, i32{{$}}
340; NOREGS-NEXT:  get_local 1{{$}}
341; NOREGS-NEXT:  get_local 0{{$}}
342; NOREGS-NEXT:  i32.mul
343; NOREGS-NEXT:  tee_local   1{{$}}
344; NOREGS-NEXT:  call        use_a@FUNCTION{{$}}
345; NOREGS-NEXT:  get_local   1{{$}}
346; NOREGS-NEXT:  call        use_b@FUNCTION{{$}}
347; NOREGS-NEXT:  return{{$}}
348declare void @use_a(i32)
349declare void @use_b(i32)
350define void @simple_multiple_use(i32 %x, i32 %y) {
351  %mul = mul i32 %y, %x
352  call void @use_a(i32 %mul)
353  call void @use_b(i32 %mul)
354  ret void
355}
356
357; Multiple uses of the same value in one instruction.
358
359; CHECK-LABEL: multiple_uses_in_same_insn:
360; CHECK:  .param      i32, i32{{$}}
361; CHECK-NEXT:  i32.mul     $push[[NUM0:[0-9]+]]=, $1, $0{{$}}
362; CHECK-NEXT:  tee_local   $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
363; CHECK-NEXT:  call        use_2@FUNCTION, $pop[[NUM1]], $[[NUM2]]{{$}}
364; CHECK-NEXT:  return{{$}}
365; NOREGS-LABEL: multiple_uses_in_same_insn:
366; NOREGS:  .param      i32, i32{{$}}
367; NOREGS-NEXT:  get_local 1{{$}}
368; NOREGS-NEXT:  get_local 0{{$}}
369; NOREGS-NEXT:  i32.mul
370; NOREGS-NEXT:  tee_local   1{{$}}
371; NOREGS-NEXT:  get_local   1{{$}}
372; NOREGS-NEXT:  call        use_2@FUNCTION{{$}}
373; NOREGS-NEXT:  return{{$}}
374declare void @use_2(i32, i32)
375define void @multiple_uses_in_same_insn(i32 %x, i32 %y) {
376  %mul = mul i32 %y, %x
377  call void @use_2(i32 %mul, i32 %mul)
378  ret void
379}
380
381; Commute operands to achieve better stackifying.
382
383; CHECK-LABEL: commute:
384; CHECK-NOT: param
385; CHECK:  .result     i32{{$}}
386; CHECK-NEXT:  i32.call    $push0=, red@FUNCTION{{$}}
387; CHECK-NEXT:  i32.call    $push1=, green@FUNCTION{{$}}
388; CHECK-NEXT:  i32.add     $push2=, $pop0, $pop1{{$}}
389; CHECK-NEXT:  i32.call    $push3=, blue@FUNCTION{{$}}
390; CHECK-NEXT:  i32.add     $push4=, $pop2, $pop3{{$}}
391; CHECK-NEXT:  return      $pop4{{$}}
392; NOREGS-LABEL: commute:
393; NOREGS-NOT: param
394; NOREGS:  .result     i32{{$}}
395; NOREGS-NEXT:  i32.call    red@FUNCTION{{$}}
396; NOREGS-NEXT:  i32.call    green@FUNCTION{{$}}
397; NOREGS-NEXT:  i32.add {{$}}
398; NOREGS-NEXT:  i32.call    blue@FUNCTION{{$}}
399; NOREGS-NEXT:  i32.add {{$}}
400; NOREGS-NEXT:  return{{$}}
401declare i32 @red()
402declare i32 @green()
403declare i32 @blue()
404define i32 @commute() {
405  %call = call i32 @red()
406  %call1 = call i32 @green()
407  %add = add i32 %call1, %call
408  %call2 = call i32 @blue()
409  %add3 = add i32 %add, %call2
410  ret i32 %add3
411}
412
413; Don't stackify a register when it would move a the def of the register past
414; an implicit get_local for the register.
415
416; CHECK-LABEL: no_stackify_past_use:
417; CHECK:      i32.call        $1=, callee@FUNCTION, $0
418; CHECK-NEXT: i32.const       $push0=, 1
419; CHECK-NEXT: i32.add         $push1=, $0, $pop0
420; CHECK-NEXT: i32.call        $push2=, callee@FUNCTION, $pop1
421; CHECK-NEXT: i32.sub         $push3=, $pop2, $1
422; CHECK-NEXT: i32.div_s       $push4=, $pop3, $1
423; CHECK-NEXT: return          $pop4
424; NOREGS-LABEL: no_stackify_past_use:
425; NOREGS:      get_local       0{{$}}
426; NOREGS-NEXT: i32.call        callee@FUNCTION
427; NOREGS-NEXT: set_local       1{{$}}
428; NOREGS-NEXT: get_local       0{{$}}
429; NOREGS-NEXT: i32.const       1
430; NOREGS-NEXT: i32.add
431; NOREGS-NEXT: i32.call        callee@FUNCTION
432; NOREGS-NEXT: get_local       1{{$}}
433; NOREGS-NEXT: i32.sub
434; NOREGS-NEXT: get_local       1{{$}}
435; NOREGS-NEXT: i32.div_s
436; NOREGS-NEXT: return
437declare i32 @callee(i32)
438define i32 @no_stackify_past_use(i32 %arg) {
439  %tmp1 = call i32 @callee(i32 %arg)
440  %tmp2 = add i32 %arg, 1
441  %tmp3 = call i32 @callee(i32 %tmp2)
442  %tmp5 = sub i32 %tmp3, %tmp1
443  %tmp6 = sdiv i32 %tmp5, %tmp1
444  ret i32 %tmp6
445}
446
447; This is the same as no_stackify_past_use, except using a commutative operator,
448; so we can reorder the operands and stackify.
449
450; CHECK-LABEL: commute_to_fix_ordering:
451; CHECK: i32.call        $push[[L0:.+]]=, callee@FUNCTION, $0
452; CHECK: tee_local       $push[[L1:.+]]=, $1=, $pop[[L0]]
453; CHECK: i32.const       $push0=, 1
454; CHECK: i32.add         $push1=, $0, $pop0
455; CHECK: i32.call        $push2=, callee@FUNCTION, $pop1
456; CHECK: i32.add         $push3=, $1, $pop2
457; CHECK: i32.mul         $push4=, $pop[[L1]], $pop3
458; CHECK: return          $pop4
459; NOREGS-LABEL: commute_to_fix_ordering:
460; NOREGS: get_local       0{{$}}
461; NOREGS: i32.call        callee@FUNCTION
462; NOREGS: tee_local       1
463; NOREGS: get_local       1{{$}}
464; NOREGS: get_local       0{{$}}
465; NOREGS: i32.const       1
466; NOREGS: i32.add
467; NOREGS: i32.call        callee@FUNCTION
468; NOREGS: i32.add
469; NOREGS: i32.mul
470; NOREGS: return
471define i32 @commute_to_fix_ordering(i32 %arg) {
472  %tmp1 = call i32 @callee(i32 %arg)
473  %tmp2 = add i32 %arg, 1
474  %tmp3 = call i32 @callee(i32 %tmp2)
475  %tmp5 = add i32 %tmp3, %tmp1
476  %tmp6 = mul i32 %tmp5, %tmp1
477  ret i32 %tmp6
478}
479
480; Stackify individual defs of virtual registers with multiple defs.
481
482; CHECK-LABEL: multiple_defs:
483; CHECK:        f64.add         $push[[NUM0:[0-9]+]]=, ${{[0-9]+}}, $pop{{[0-9]+}}{{$}}
484; CHECK-NEXT:   tee_local       $push[[NUM1:[0-9]+]]=, $[[NUM2:[0-9]+]]=, $pop[[NUM0]]{{$}}
485; CHECK-NEXT:   f64.select      $push{{[0-9]+}}=, $pop{{[0-9]+}}, $pop[[NUM1]], ${{[0-9]+}}{{$}}
486; CHECK:        $[[NUM2]]=,
487; NOREGS-LABEL: multiple_defs:
488; NOREGS:        f64.add
489; NOREGS:        tee_local
490; NOREGS:        f64.select
491define void @multiple_defs(i32 %arg, i32 %arg1, i1 %arg2, i1 %arg3, i1 %arg4) {
492bb:
493  br label %bb5
494
495bb5:                                              ; preds = %bb21, %bb
496  %tmp = phi double [ 0.000000e+00, %bb ], [ %tmp22, %bb21 ]
497  %tmp6 = phi double [ 0.000000e+00, %bb ], [ %tmp23, %bb21 ]
498  %tmp7 = fcmp olt double %tmp6, 2.323450e+01
499  br i1 %tmp7, label %bb8, label %bb21
500
501bb8:                                              ; preds = %bb17, %bb5
502  %tmp9 = phi double [ %tmp19, %bb17 ], [ %tmp, %bb5 ]
503  %tmp10 = fadd double %tmp6, -1.000000e+00
504  %tmp11 = select i1 %arg2, double -1.135357e+04, double %tmp10
505  %tmp12 = fadd double %tmp11, %tmp9
506  br i1 %arg3, label %bb17, label %bb13
507
508bb13:                                             ; preds = %bb8
509  %tmp14 = or i32 %arg1, 2
510  %tmp15 = icmp eq i32 %tmp14, 14
511  %tmp16 = select i1 %tmp15, double -1.135357e+04, double 0xBFCE147AE147B000
512  br label %bb17
513
514bb17:                                             ; preds = %bb13, %bb8
515  %tmp18 = phi double [ %tmp16, %bb13 ], [ %tmp10, %bb8 ]
516  %tmp19 = fadd double %tmp18, %tmp12
517  %tmp20 = fcmp olt double %tmp6, 2.323450e+01
518  br i1 %tmp20, label %bb8, label %bb21
519
520bb21:                                             ; preds = %bb17, %bb5
521  %tmp22 = phi double [ %tmp, %bb5 ], [ %tmp9, %bb17 ]
522  %tmp23 = fadd double %tmp6, 1.000000e+00
523  br i1 %arg4, label %exit, label %bb5
524exit:
525  ret void
526}
527
528; Don't move calls past loads
529; CHECK-LABEL: no_stackify_call_past_load:
530; CHECK: i32.call $0=, red
531; CHECK: i32.const $push0=, 0
532; CHECK: i32.load $1=, count($pop0)
533; NOREGS-LABEL: no_stackify_call_past_load:
534; NOREGS: i32.call red
535; NOREGS: i32.const 0
536; NOREGS: i32.load count
537@count = hidden global i32 0, align 4
538define i32 @no_stackify_call_past_load() {
539  %a = call i32 @red()
540  %b = load i32, i32* @count, align 4
541  call i32 @callee(i32 %a)
542  ret i32 %b
543  ; use of a
544}
545
546; Don't move stores past loads if there may be aliasing
547; CHECK-LABEL: no_stackify_store_past_load
548; CHECK: i32.store 0($1), $0
549; CHECK: i32.load {{.*}}, 0($2)
550; CHECK: i32.call {{.*}}, callee@FUNCTION, $0{{$}}
551; NOREGS-LABEL: no_stackify_store_past_load
552; NOREGS: i32.store 0
553; NOREGS: i32.load 0
554; NOREGS: i32.call callee@FUNCTION{{$}}
555define i32 @no_stackify_store_past_load(i32 %a, i32* %p1, i32* %p2) {
556  store i32 %a, i32* %p1
557  %b = load i32, i32* %p2, align 4
558  call i32 @callee(i32 %a)
559  ret i32 %b
560}
561
562; Can still stackify past invariant loads.
563; CHECK-LABEL: store_past_invar_load
564; CHECK: i32.store 0($1), $0
565; CHECK: i32.call {{.*}}, callee@FUNCTION, $0
566; CHECK: i32.load $push{{.*}}, 0($2)
567; CHECK: return $pop
568; NOREGS-LABEL: store_past_invar_load
569; NOREGS: i32.store 0
570; NOREGS: i32.call callee@FUNCTION
571; NOREGS: i32.load 0
572; NOREGS: return
573define i32 @store_past_invar_load(i32 %a, i32* %p1, i32* dereferenceable(4) %p2) {
574  store i32 %a, i32* %p1
575  %b = load i32, i32* %p2, !invariant.load !0
576  call i32 @callee(i32 %a)
577  ret i32 %b
578}
579
580; CHECK-LABEL: ignore_dbg_value:
581; CHECK-NEXT: .Lfunc_begin
582; CHECK-NEXT: unreachable
583; NOREGS-LABEL: ignore_dbg_value:
584; NOREGS-NEXT: .Lfunc_begin
585; NOREGS-NEXT: unreachable
586declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
587define void @ignore_dbg_value() {
588  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10
589  unreachable
590}
591
592; Don't stackify an expression that might use the stack into a return, since we
593; might insert a prologue before the return.
594
595; CHECK-LABEL: no_stackify_past_epilogue:
596; CHECK: return ${{[0-9]+}}{{$}}
597; NOREGS-LABEL: no_stackify_past_epilogue:
598; NOREGS: return{{$}}
599declare i32 @use_memory(i32*)
600define i32 @no_stackify_past_epilogue() {
601  %x = alloca i32
602  %call = call i32 @use_memory(i32* %x)
603  ret i32 %call
604}
605
606; Stackify a loop induction variable into a loop comparison.
607
608; CHECK-LABEL: stackify_indvar:
609; CHECK:             i32.const   $push[[L5:.+]]=, 1{{$}}
610; CHECK-NEXT:        i32.add     $push[[L4:.+]]=, $[[R0:.+]], $pop[[L5]]{{$}}
611; CHECK-NEXT:        tee_local   $push[[L3:.+]]=, $[[R0]]=, $pop[[L4]]{{$}}
612; CHECK-NEXT:        i32.ne      $push[[L2:.+]]=, $0, $pop[[L3]]{{$}}
613; NOREGS-LABEL: stackify_indvar:
614; NOREGS:             i32.const   1{{$}}
615; NOREGS-NEXT:        i32.add
616; NOREGS-NEXT:        tee_local   2{{$}}
617; NOREGS-NEXT:        i32.ne
618define void @stackify_indvar(i32 %tmp, i32* %v) #0 {
619bb:
620  br label %bb3
621
622bb3:                                              ; preds = %bb3, %bb2
623  %tmp4 = phi i32 [ %tmp7, %bb3 ], [ 0, %bb ]
624  %tmp5 = load volatile i32, i32* %v, align 4
625  %tmp6 = add nsw i32 %tmp5, %tmp4
626  store volatile i32 %tmp6, i32* %v, align 4
627  %tmp7 = add nuw nsw i32 %tmp4, 1
628  %tmp8 = icmp eq i32 %tmp7, %tmp
629  br i1 %tmp8, label %bb10, label %bb3
630
631bb10:                                             ; preds = %bb9, %bb
632  ret void
633}
634
635; Don't stackify a call past a __stack_pointer store.
636
637; CHECK-LABEL: stackpointer_dependency:
638; CHECK:      call {{.+}}, stackpointer_callee@FUNCTION,
639; CHECK-NEXT: set_global __stack_pointer@GLOBAL,
640; NOREGS-LABEL: stackpointer_dependency:
641; NOREGS:      call stackpointer_callee@FUNCTION
642; NOREGS:      set_global __stack_pointer
643declare i32 @stackpointer_callee(i8* readnone, i8* readnone)
644declare i8* @llvm.frameaddress(i32)
645define i32 @stackpointer_dependency(i8* readnone) {
646  %2 = tail call i8* @llvm.frameaddress(i32 0)
647  %3 = tail call i32 @stackpointer_callee(i8* %0, i8* %2)
648  ret i32 %3
649}
650
651; Stackify a call_indirect with respect to its ordering
652
653; CHECK-LABEL: call_indirect_stackify:
654; CHECK: i32.load  $push[[L4:.+]]=, 0($0)
655; CHECK-NEXT: tee_local $push[[L3:.+]]=, $0=, $pop[[L4]]
656; CHECK-NEXT: i32.load  $push[[L0:.+]]=, 0($0)
657; CHECK-NEXT: i32.load  $push[[L1:.+]]=, 0($pop[[L0]])
658; CHECK-NEXT: i32.call_indirect $push{{.+}}=, $pop[[L3]], $1, $pop[[L1]]
659; NOREGS-LABEL: call_indirect_stackify:
660; NOREGS: i32.load  0
661; NOREGS-NEXT: tee_local 0
662; NOREGS:      i32.load  0
663; NOREGS-NEXT: i32.load  0
664; NOREGS-NEXT: i32.call_indirect
665%class.call_indirect = type { i32 (...)** }
666define i32 @call_indirect_stackify(%class.call_indirect** %objptr, i32 %arg) {
667  %obj = load %class.call_indirect*, %class.call_indirect** %objptr
668  %addr = bitcast %class.call_indirect* %obj to i32(%class.call_indirect*, i32)***
669  %vtable = load i32(%class.call_indirect*, i32)**, i32(%class.call_indirect*, i32)*** %addr
670  %vfn = getelementptr inbounds i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vtable, i32 0
671  %f = load i32(%class.call_indirect*, i32)*, i32(%class.call_indirect*, i32)** %vfn
672  %ret = call i32 %f(%class.call_indirect* %obj, i32 %arg)
673  ret i32 %ret
674}
675
676!llvm.module.flags = !{!0}
677!llvm.dbg.cu = !{!1}
678
679!0 = !{i32 2, !"Debug Info Version", i32 3}
680!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang version 3.9.0 (trunk 266005) (llvm/trunk 266105)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
681!2 = !DIFile(filename: "test.c", directory: "/")
682!3 = !{}
683!5 = distinct !DISubprogram(name: "test", scope: !2, file: !2, line: 10, type: !6, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !3)
684!6 = !DISubroutineType(types: !3)
685!7 = !DILocalVariable(name: "nzcnt", scope: !5, file: !2, line: 15, type: !8)
686!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
687!9 = !DIExpression()
688!10 = !DILocation(line: 15, column: 6, scope: !5)
689
690